├── .drone.yml ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .travis.yml ├── DEVELOPMENT.rst ├── Doxyfile ├── MANIFEST.in ├── README.rst ├── SConstruct ├── doc-requirements.txt ├── docs ├── Makefile ├── build.sh ├── notebooks └── source │ ├── _images │ ├── BuildingHypergraph_14_0.png │ ├── BuildingHypergraph_7_0.png │ ├── EditDistance.png │ ├── EditDistance_14_0.png │ ├── EditDistance_7_0.png │ ├── EditDistance_9_0.png │ ├── Fibonacci.png │ ├── Fibonacci_5_0.png │ ├── decipher.png │ ├── decipher_15_0.png │ ├── decipher_17_0.png │ ├── decipher_28_0.png │ ├── decipher_7_0.png │ ├── hmm.png │ ├── hmm_11_0.png │ ├── hmm_13_0.png │ ├── hmm_14_0.png │ ├── hmm_15_0.png │ ├── hmm_16_0.png │ ├── hmm_17_0.png │ ├── hmm_21_0.png │ ├── hmm_23_0.png │ ├── hmm_24_0.png │ ├── hmm_25_0.png │ ├── hmm_27_0.png │ ├── hmm_28_0.png │ ├── hmm_29_0.png │ ├── hmm_30_0.png │ ├── hmm_31_0.png │ ├── hmm_33_0.png │ ├── hmm_34_0.png │ ├── hmm_35_0.png │ ├── hmm_37_0.png │ ├── hmm_40_0.png │ ├── hypergraphs_4_0.png │ ├── hypergraphs_5_0.png │ ├── parsing.png │ ├── parsing_12_0.png │ ├── parsing_4_0.png │ ├── parsing_9_0.png │ ├── parsing_9_1.png │ ├── tutorial_16_0.png │ ├── tutorial_20_0.png │ ├── tutorial_22_0.png │ ├── tutorial_23_0.png │ ├── tutorial_27_0.png │ ├── tutorial_7_0.png │ └── tutorial_9_0.png │ ├── _templates │ └── class.rst │ ├── api.rst │ ├── conf.py │ ├── development.rst │ ├── examples.rst │ ├── extensions.rst │ ├── full.bib │ ├── gallery.rst │ ├── hypergraph.rst │ ├── index.rst │ ├── nlp.rst │ ├── notebooks │ └── setup.rst ├── notebooks ├── BeamSearch.ipynb ├── BeamSearch.rst ├── BeamSearch_files │ ├── BeamSearch_2_0.text │ ├── BeamSearch_4_0.text │ ├── BeamSearch_5_0.png │ ├── BeamSearch_5_0.text │ └── BeamSearch_8_0.text ├── BuildingHypergraph.ipynb ├── BuildingHypergraph.rst ├── BuildingHypergraph_files │ ├── BuildingHypergraph_10_0.text │ ├── BuildingHypergraph_13_0.text │ ├── BuildingHypergraph_14_0.png │ ├── BuildingHypergraph_14_0.text │ ├── BuildingHypergraph_7_0.png │ └── BuildingHypergraph_7_0.text ├── Debugging.ipynb ├── Debugging.rst ├── EditDistance.ipynb ├── EditDistance.rst ├── EditDistance_files │ ├── EditDistance_10_0.text │ ├── EditDistance_11_0.text │ ├── EditDistance_12_0.png │ ├── EditDistance_12_0.text │ ├── EditDistance_13_0.png │ ├── EditDistance_13_0.text │ ├── EditDistance_15_0.png │ ├── EditDistance_15_0.text │ ├── EditDistance_16_0.text │ ├── EditDistance_17_0.png │ ├── EditDistance_17_0.text │ ├── EditDistance_18_0.png │ ├── EditDistance_18_0.text │ ├── EditDistance_19_0.png │ ├── EditDistance_19_0.text │ ├── EditDistance_20_0.png │ ├── EditDistance_20_0.text │ ├── EditDistance_21_0.png │ ├── EditDistance_21_0.text │ ├── EditDistance_22_0.png │ ├── EditDistance_22_0.text │ ├── EditDistance_23_0.png │ ├── EditDistance_23_0.text │ ├── EditDistance_24_0.text │ ├── EditDistance_25_0.png │ ├── EditDistance_25_0.text │ ├── EditDistance_26_0.png │ ├── EditDistance_26_0.text │ ├── EditDistance_27_0.png │ ├── EditDistance_27_0.text │ ├── EditDistance_29_0.png │ ├── EditDistance_29_0.text │ ├── EditDistance_6_0.text │ ├── EditDistance_8_0.text │ └── EditDistance_9_0.text ├── Fibonacci.ipynb ├── Fibonacci.rst ├── Fibonacci_files │ ├── Fibonacci_10_0.text │ ├── Fibonacci_12_0.png │ ├── Fibonacci_12_0.text │ ├── Fibonacci_14_0.png │ ├── Fibonacci_14_0.text │ ├── Fibonacci_15_0.png │ ├── Fibonacci_15_0.text │ ├── Fibonacci_4_0.text │ └── Fibonacci_5_0.text ├── Lattice.ipynb ├── Lattice.rst ├── Lattice_files │ ├── Lattice_23_0.text │ ├── Lattice_3_0.text │ └── Lattice_6_0.text ├── PerceptronTagger.ipynb ├── PerceptronTagger.rst ├── PerceptronTagger_files │ ├── PerceptronTagger_11_0.text │ ├── PerceptronTagger_11_1.text │ ├── PerceptronTagger_17_0.text │ ├── PerceptronTagger_1_0.text │ ├── PerceptronTagger_4_0.text │ ├── PerceptronTagger_6_0.text │ ├── PerceptronTagger_7_0.text │ ├── PerceptronTagger_7_1.text │ └── PerceptronTagger_9_0.text ├── conftest.py ├── decipher.ipynb ├── decipher.rst ├── decipher_files │ ├── decipher_12_0.text │ ├── decipher_13_0.text │ ├── decipher_15_0.png │ ├── decipher_15_0.text │ ├── decipher_16_0.text │ ├── decipher_16_1.text │ ├── decipher_21_0.text │ ├── decipher_22_0.text │ ├── decipher_22_1.text │ ├── decipher_7_0.png │ └── decipher_7_0.text ├── doc │ ├── ChartBuilder.ipynb │ ├── ChartBuilder.rst │ ├── ChartBuilder_files │ │ ├── ChartBuilder_10_0.png │ │ ├── ChartBuilder_10_0.text │ │ ├── ChartBuilder_11_0.png │ │ ├── ChartBuilder_11_0.text │ │ ├── ChartBuilder_12_0.png │ │ ├── ChartBuilder_12_0.text │ │ ├── ChartBuilder_13_0.png │ │ ├── ChartBuilder_13_0.text │ │ ├── ChartBuilder_14_0.png │ │ ├── ChartBuilder_14_0.text │ │ ├── ChartBuilder_15_0.png │ │ ├── ChartBuilder_15_0.text │ │ ├── ChartBuilder_16_0.png │ │ ├── ChartBuilder_16_0.text │ │ ├── ChartBuilder_6_0.png │ │ ├── ChartBuilder_6_0.text │ │ ├── ChartBuilder_7_0.png │ │ ├── ChartBuilder_7_0.text │ │ ├── ChartBuilder_8_0.png │ │ ├── ChartBuilder_8_0.text │ │ ├── ChartBuilder_9_0.png │ │ └── ChartBuilder_9_0.text │ ├── Hypergraph.ipynb │ ├── Hypergraph.rst │ ├── Hypergraph_files │ │ └── Hypergraph_8_0.text │ ├── StructuredEncoder.ipynb │ ├── StructuredEncoder.rst │ ├── StructuredEncoder_files │ │ ├── StructuredEncoder_6_0.text │ │ ├── StructuredEncoder_7_0.text │ │ ├── StructuredEncoder_8_0.text │ │ └── StructuredEncoder_9_0.text │ ├── Weights.ipynb │ ├── Weights.rst │ ├── best_path.ipynb │ ├── best_path.rst │ ├── best_path_files │ │ ├── best_path_2_0.text │ │ ├── best_path_3_0.text │ │ ├── best_path_4_0.png │ │ ├── best_path_4_0.text │ │ ├── best_path_5_0.png │ │ └── best_path_5_0.text │ ├── binarize.ipynb │ ├── binarize.rst │ ├── binarize_files │ │ ├── binarize_5_0.png │ │ ├── binarize_5_0.text │ │ ├── binarize_7_0.png │ │ └── binarize_7_0.text │ ├── cfg.ipynb │ ├── cfg.rst │ ├── conftest.py │ ├── draw.ipynb │ ├── draw.rst │ ├── draw_files │ │ ├── draw_3_0.png │ │ ├── draw_3_0.text │ │ ├── draw_4_0.png │ │ ├── draw_4_0.text │ │ ├── draw_6_0.png │ │ └── draw_6_0.text │ ├── eisner.ipynb │ ├── eisner.rst │ ├── eisner_files │ │ ├── eisner_4_0.png │ │ ├── eisner_4_0.text │ │ └── eisner_5_0.text │ ├── filter.ipynb │ ├── filter.rst │ ├── filter_files │ │ ├── filter_4_0.png │ │ ├── filter_4_0.text │ │ ├── filter_6_0.png │ │ └── filter_6_0.text │ ├── inside.ipynb │ ├── inside.rst │ ├── inside_files │ │ ├── inside_4_0.png │ │ ├── inside_4_0.text │ │ ├── inside_5_0.png │ │ ├── inside_5_0.text │ │ ├── inside_6_0.png │ │ ├── inside_6_0.text │ │ ├── inside_7_0.png │ │ └── inside_7_0.text │ ├── intersect.ipynb │ ├── intersect.rst │ ├── kbest.ipynb │ ├── kbest.rst │ ├── kbest_files │ │ ├── kbest_4_0.png │ │ ├── kbest_4_0.text │ │ ├── kbest_5_0.text │ │ ├── kbest_6_0.png │ │ └── kbest_6_0.text │ ├── lp.ipynb │ ├── lp.rst │ ├── lp_files │ │ ├── lp_5_0.png │ │ ├── lp_5_0.text │ │ ├── lp_6_0.png │ │ ├── lp_6_0.text │ │ └── lp_7_0.text │ ├── marginals.ipynb │ ├── marginals.rst │ ├── outside.ipynb │ ├── outside.rst │ ├── outside_files │ │ ├── outside_10_0.png │ │ ├── outside_10_0.text │ │ ├── outside_11_0.png │ │ ├── outside_11_0.text │ │ ├── outside_4_0.png │ │ ├── outside_4_0.text │ │ ├── outside_5_0.png │ │ ├── outside_5_0.text │ │ ├── outside_6_0.png │ │ ├── outside_6_0.text │ │ ├── outside_7_0.png │ │ ├── outside_7_0.text │ │ ├── outside_8_0.png │ │ └── outside_8_0.text │ ├── semimarkov.ipynb │ ├── semimarkov.rst │ ├── tagger.ipynb │ ├── tagger.rst │ ├── tagger_files │ │ ├── tagger_4_0.text │ │ ├── tagger_5_0.png │ │ └── tagger_5_0.text │ ├── training.ipynb │ ├── training.rst │ ├── training_files │ │ ├── training_7_0.text │ │ └── training_8_0.text │ ├── transform.ipynb │ ├── transform.rst │ └── transform_files │ │ ├── transform_5_0.png │ │ ├── transform_5_0.text │ │ ├── transform_6_0.png │ │ ├── transform_6_0.text │ │ └── transform_7_0.text ├── fill.ipynb ├── fill.rst ├── fill_files │ └── fill_3_0.text ├── hmm.ipynb ├── hmm.rst ├── hmm_files │ ├── hmm_11_0.text │ ├── hmm_12_0.text │ ├── hmm_13_0.png │ ├── hmm_13_0.text │ ├── hmm_14_0.png │ ├── hmm_14_0.text │ ├── hmm_15_0.text │ ├── hmm_4_0.text │ ├── hmm_8_0.png │ ├── hmm_8_0.text │ ├── hmm_9_0.png │ └── hmm_9_0.text ├── hypergraphs.ipynb ├── hypergraphs.rst ├── hypergraphs_files │ ├── hypergraphs_4_0.png │ ├── hypergraphs_4_0.text │ ├── hypergraphs_5_0.png │ └── hypergraphs_5_0.text ├── images │ ├── EditDistance.png │ ├── Fibonacci.png │ ├── decipher.png │ ├── graph.png │ ├── hmm.png │ ├── hmm_30_0.png │ ├── parse_hypergraph.png │ ├── parse_hypergraph_no_path.png │ ├── parsing.png │ ├── single_edge.png │ ├── trap-left.png │ ├── trap-right.png │ ├── triangle-left.png │ ├── triangle-right.png │ └── triangle.png ├── index.rst ├── parsing.ipynb ├── parsing.rst ├── parsing_files │ ├── parsing_10_0.png │ ├── parsing_10_0.text │ ├── parsing_11_0.png │ ├── parsing_11_0.text │ ├── parsing_4_0.text │ ├── parsing_5_0.text │ ├── parsing_6_0.text │ ├── parsing_7_0.png │ ├── parsing_7_0.text │ ├── parsing_8_0.png │ ├── parsing_8_0.text │ └── parsing_9_0.text ├── phrase_based.ipynb ├── phrase_based.rst ├── phrase_based_files │ ├── phrase_based_3_0.png │ ├── phrase_based_3_0.text │ ├── phrase_based_7_0.text │ └── phrase_based_9_0.text ├── sequence_crf.ipynb ├── sequence_crf.rst └── sequence_crf_files │ ├── sequence_crf_10_0.text │ ├── sequence_crf_7_0.text │ ├── sequence_crf_8_0.text │ └── sequence_crf_9_0.text ├── optional-requirements.txt ├── python └── pydecode │ ├── __init__.py │ ├── _pydecode.cpp │ ├── _pydecode.pxd │ ├── _pydecode.pyx │ ├── constraints.py │ ├── display.py │ ├── encoder.py │ ├── ext │ ├── __init__.py │ ├── factorgraph.py │ └── lp_gurobi.py │ ├── linear_program.py │ ├── model.py │ ├── nlp │ ├── __init__.py │ ├── cfg.py │ ├── cfg_tests.py │ ├── decoding.py │ ├── decoding_tests.py │ ├── dep_train.py │ ├── dependency_parsing.py │ ├── dependency_parsing_tests.py │ ├── format.py │ ├── multinomial.py │ ├── permutation.py │ ├── phrase_based.py │ ├── speed_tests.py │ ├── tagging.py │ └── tagging_tests.py │ ├── optimization.py │ ├── potentials.so │ ├── templates │ ├── algorithms.pxd.tpl │ ├── algorithms.pyx.tpl │ ├── beam.pxd.tpl │ ├── beam.pyx.tpl │ ├── beam.yaml │ ├── chart.pxd │ ├── chart.pyx │ ├── cython_jinja.py │ ├── extensions.pxd │ ├── extensions.pyx │ ├── libhypergraph.pxd │ ├── libhypergraph.pyx │ └── weights.yaml │ └── test │ ├── __init__.py │ ├── algorithm_tests.py │ ├── chart_tests.py │ ├── hypergraph_tests.py │ ├── transform_tests.py │ └── utils.py ├── requirements.txt ├── setup.cfg ├── setup.py └── src ├── Hypergraph ├── Algorithms.cpp ├── Algorithms.hh ├── Automaton.hh ├── BeamSearch.cpp ├── BeamSearch.hh ├── Hypergraph.cpp ├── Hypergraph.hh ├── Potentials.cpp ├── Potentials.hh ├── SemiringAlgorithms.cpp ├── SemiringAlgorithms.h ├── SemiringAlgorithms.hh ├── Semirings.cpp └── Semirings.hh ├── README.rst ├── SConscript ├── Tests.cpp ├── common.h └── foreach.h /.drone.yml: -------------------------------------------------------------------------------- 1 | image: scivm/scientific-python-2.7 2 | env: 3 | - PYTHONPATH=/var/cache/drone 4 | script: 5 | - source /opt/ve/bin/activate 6 | - ls . 7 | - apt-get install -y glpk libboost-dev 8 | - pip install pulp 9 | - python setup.py config 10 | - python setup.py build 11 | - python setup.py install 12 | - nosetests python/pydecode/test/ 13 | - apt-get install -y pkg-config graphviz graphviz-dev libblas-dev python-cvxopt 14 | - pip install pulp networkx pystruct matplotlib pytest pandas pygraphviz cvxopt scikit-learn 15 | - py.test notebooks 16 | notify: 17 | email: 18 | recipients: 19 | - sasha.rush@gmail.com -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | potentials.so binary -diff 2 | potentials.cpp binary -diff 3 | potentials.pyx binary -diff 4 | potentials.pxd binary -diff 5 | *.xml binary -diff 6 | docs/ext/* binary -diff 7 | docs/xml/* binary -diff 8 | docs/src/generated/* binary -diff 9 | notebooks/*.ipynb binary -diff 10 | notebooks/doc/*.ipynb binary -diff 11 | python/pydecode/_pydecode.cpp binary -diff 12 | python/pydecode/_pydecode.pyx binary -diff -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by http://gitignore.io 2 | 3 | ### Python ### 4 | *.py[cod] 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Packages 10 | *.egg 11 | *.egg-info 12 | dist 13 | build 14 | eggs 15 | parts 16 | bin 17 | var 18 | sdist 19 | develop-eggs 20 | .installed.cfg 21 | lib 22 | lib64 23 | __pycache__ 24 | 25 | # Installer logs 26 | pip-log.txt 27 | 28 | # Unit test / coverage reports 29 | .coverage 30 | .tox 31 | nosetests.xml 32 | 33 | # Translations 34 | *.mo 35 | 36 | # Mr Developer 37 | .mr.developer.cfg 38 | .project 39 | .pydevproject 40 | 41 | ### C++ ### 42 | # Compiled Object files 43 | *.slo 44 | *.lo 45 | *.o 46 | 47 | # Compiled Dynamic libraries 48 | *.so 49 | *.dylib 50 | 51 | # Compiled Static libraries 52 | *.lai 53 | *.la 54 | *.a 55 | 56 | ### Linux ### 57 | .* 58 | !.gitignore 59 | !.git* 60 | *~ 61 | G* 62 | build_config.py 63 | 64 | ## Sublime files ## 65 | *.sublime-project 66 | *.sublime-workspace 67 | 68 | *.log 69 | *.out 70 | *.idx 71 | *.aux 72 | *.nav 73 | *.syntex.gz 74 | *.toc 75 | *.vrb 76 | *.synctex.gz 77 | *.snm 78 | 79 | generated/ 80 | auto/ 81 | 82 | *_flymake.h 83 | notebooks/*.py 84 | MANIFEST 85 | *pstats 86 | docs/html/ 87 | docs/latex/ 88 | pydecode-*/ 89 | venv/ 90 | ENV/ 91 | *.old 92 | todo 93 | *flymake* -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "numpydoc"] 2 | path = numpydoc 3 | url = https://github.com/numpy/numpydoc 4 | [submodule "docs/ext/numpydoc_base"] 5 | path = docs/ext/numpydoc_base 6 | url = https://github.com/numpy/numpydoc 7 | [submodule "docs/ext/breathe_base"] 8 | path = docs/ext/breathe_base 9 | url = https://github.com/michaeljones/breathe.git 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | virtualenv: 3 | system_site_packages: true 4 | python: 5 | - "2.7" 6 | # command to install dependencies 7 | before_install: 8 | - sudo apt-get update -qq 9 | - sudo apt-get install python-scipy python-cvxopt graphviz cython glpk libboost-dev 10 | install: 11 | - pip install -r requirements.txt -r optional-requirements.txt --use-mirrors 12 | - pip install . 13 | # command to run tests 14 | script: 15 | - nosetests python/pydecode 16 | - cd python; py.test ../notebooks -------------------------------------------------------------------------------- /DEVELOPMENT.rst: -------------------------------------------------------------------------------- 1 | To contribute to development please email srush at csail mit edu or send a pull request. 2 | 3 | Build Commands 4 | =============== 5 | 6 | Run to build a debug, profile and optimized version of the libdecode library :: 7 | 8 | > scons 9 | 10 | 11 | Run to build the C++ and python documentation. The index page is in "docs/build/index.html" :: 12 | 13 | > scons docs 14 | 15 | 16 | Builds and runs the C++ tests. (Requires gtest). :: 17 | 18 | > scons test 19 | 20 | Builds and runs the python lib. (Requiresd cython, jinja). :: 21 | 22 | > scons pylib 23 | 24 | Runs the python tests. (Requires py.test and nosetests). :: 25 | 26 | > scons pytest 27 | 28 | Runs a C++ lint check. Uses the Google C++ style guide. :: 29 | 30 | > bash script/check_syntax.sh 31 | 32 | Runs a python lint check. (Requires pep8) :: 33 | 34 | > bash script/check_python_syntax.sh 35 | 36 | Builds a clean version of the library and Cython code. :: 37 | 38 | > bash script/build.sh 39 | 40 | Build (and install) the python extension. 41 | 42 | > python setup.py config 43 | > python setup.py build 44 | > python setup.py build_ext --inplace 45 | > sudo python setup.py install 46 | 47 | Profiling 48 | 49 | > python -m profile -o output.pstats test.py 50 | > python ~/Downloads/gprof2dot.py -f pstats output.pstats | dot -Tpng -o output.png 51 | 52 | Profiling C++ 53 | 54 | > python -m yep run_c2f.py 55 | > google-pprof -gv potentials.so run_c2f.py.prof 56 | 57 | 58 | Documentation 59 | ============= 60 | 61 | C++ indoc documentation is in Doxygen format. 62 | 63 | http://www.stack.nl/~dimitri/doxygen/manual/docblocks.html 64 | 65 | Python indoc documentation is in numpydoc format. 66 | 67 | https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt 68 | 69 | 70 | Directories 71 | ============ 72 | 73 | src/ - C++ library code. 74 | 75 | python/ - Python library code. 76 | 77 | notebooks/ - IPython notebooks (Included as documentation). 78 | 79 | docs/ - Sphinx documentation. 80 | 81 | scripts/ - misc. scripts 82 | 83 | writing/ - misc. associated documents. 84 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include src *.cpp *.h *.hh 2 | recursive-include python *.cpp *.h *.hh 3 | include *.txt 4 | recursive-include notebooks *.ipynb -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | PyDecode is a dynamic programming toolkit developed for research in natural langauge processing. Its aim is to be simple enough for fast prototyping, but efficient enough for research use. 3 | 4 | 5 | .. _documentation: http://pydecode.readthedocs.org/ 6 | 7 | 8 | .. image:: _images/parsing_9_0.png 9 | :width: 500 px 10 | :align: center 11 | 12 | | 13 | | 14 | 15 | 16 | Features 17 | -------- 18 | 19 | * **Simple specifications.** Dynamic programming algorithms specified through pseudo-code. :: 20 | 21 | # Viterbi algorithm. 22 | ... 23 | c.init(items[0, :]) 24 | for i in range(1, n): 25 | for t in range(len(tags)): 26 | c.set(items[i, t], 27 | items[i-1, :], 28 | labels=labels[i, t, :]) 29 | graph = c.finish() 30 | 31 | * **Efficient implementation.** Core code in C++, python interfaces through numpy. :: 32 | 33 | # Compute path. 34 | label_weights = numpy.random.random(graph.label_size) 35 | weights = pydecode.transform_label_array(graph, label_weights) 36 | path = pydecode.best_path(graph, weights) 37 | 38 | * **High-level algorithms.** Includes a set of widely-used algorithms. :: 39 | 40 | # Inside probabilities. 41 | inside = pydecode.inside(graph, weights, kind=pydecode.LogProb) 42 | 43 | # (Max)-marginals. 44 | marginals = pydecode.marginals(graph, weights) 45 | 46 | # Pruning 47 | mask = marginals > threshold 48 | pruned_graph = pydecode.filter(graph, mask) 49 | 50 | * **Integration with machine learning toolkits.** Train structured models. :: 51 | 52 | # Train a discriminative tagger. 53 | perceptron_tagger = StructuredPerceptron(tagger) 54 | perceptron_tagger.fit(X, Y) 55 | Y_test = perceptron_tagger.predict(X_test) 56 | 57 | * **Visualization tools.** IPython integrated tools for debugging and teaching. :: 58 | 59 | pydecode.draw(graph, paths=paths) 60 | 61 | .. image:: _images/hmm.png 62 | :width: 500 px 63 | :align: center 64 | 65 | 66 | .. Documentation, Tutorial and Gallery 67 | .. ---------------------- 68 | 69 | .. .. hlist:: 70 | .. :columns: 2 71 | 72 | .. * documentation_ 73 | .. * tutorial_ 74 | .. * gallery_ 75 | .. * api_ 76 | 77 | 78 | .. Features 79 | .. ------------- 80 | 81 | .. Currently the toolkit is in development. It includes the following features: 82 | 83 | .. * Simple construction of dynamic programs. 84 | .. * Customizable GraphViz output for debugging. 85 | .. * Algorithms for best path, inside scores, outside scores, and oracle scores. 86 | .. * Several types of pruning. 87 | .. * Integration with an (I)LP solver for constrained problems. 88 | .. * Lagrangian Relaxation optimization tools. 89 | .. * Semiring operations over hypergraph structures. 90 | .. * Hooks into PyStruct for structured training. 91 | .. * Fast k-best algorithms. 92 | 93 | 94 | .. .. image:: https://travis-ci.org/srush/PyDecode.png?branch=master 95 | .. :target: https://travis-ci.org/srush/PyDecode 96 | 97 | .. _gallery: http://pydecode.readthedocs.org/en/latest/notebooks/index.html 98 | .. _tutorial: http://pydecode.readthedocs.org/en/latest/notebooks/index.html 99 | .. _api: http://pydecode.readthedocs.org/en/latest/api.html 100 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | local_libs = {} 4 | for build_mode in ['debug', 'profile', 'opt']: 5 | env = Environment(CXX="g++", ENV=os.environ) 6 | 7 | if build_mode == "debug": 8 | env.Prepend(CCFLAGS =('-g', '-fPIC', '-Wall')) 9 | elif build_mode == "profile": 10 | env.Append(CCFLAGS = ('-O2', '-p', "-ggdb", 11 | "-fprofile-arcs", "-ftest-coverage", 12 | "-fno-strict-aliasing"), 13 | LINKFLAGS = ('-O2', '-p', "-ggdb" , 14 | "-fprofile-arcs", 15 | "-ftest-coverage", 16 | "-fno-strict-aliasing")) 17 | elif build_mode == "opt": 18 | env.Append(CCFLAGS = ('-O2', '-fPIC', 19 | '-Werror', '-Wno-deprecated', 20 | "-fno-strict-aliasing"), 21 | LINKFLAGS = ('-O2', '-fPIC', 22 | "-fno-strict-aliasing")) 23 | 24 | variant = 'build/' + build_mode + "/" 25 | env.VariantDir(variant, '.') 26 | sub_dirs = ['#/' + variant + 'src'] 27 | libs = ('decoding') 28 | env.Append(LIBPATH =('.',) + tuple(sub_dirs)) 29 | cpppath = ('.', tuple(sub_dirs)) 30 | env.Append(CPPPATH=[cpppath]) 31 | env.Append(LIBS=libs) 32 | local_libs[build_mode] = env.SConscript(dirs=sub_dirs, exports=['env']) 33 | 34 | # Run the C++ tests. 35 | env = Environment(ENV=os.environ, CXX="g++") 36 | env.Append(CPPPATH = ["src/"], LIBPATH = ['/usr/lib/', '/usr/local/lib']) 37 | 38 | b = env.Program("build/test", 'src/Tests.cpp', 39 | LIBS = ["pthread", "gtest"] + local_libs["debug"]) 40 | 41 | b2 = env.Command("build/test.out", b, "build/test") 42 | env.Alias("test", b2) 43 | 44 | 45 | # Build the docs. 46 | # notebooks = env.Command("ignore_note", [], "cd notebooks;make all") 47 | # env.AlwaysBuild(notebooks) 48 | 49 | # doxygen = env.Command("ignore_dox", [], "doxygen Doxyfile") 50 | # env.AlwaysBuild(doxygen) 51 | 52 | docs = env.Command("ignore_docs", [], "cd docs; make html") 53 | env.AlwaysBuild(docs) 54 | 55 | env.Alias("docs", [docs]) 56 | 57 | # Run the python tests. 58 | pytests = env.Command("ignore_test", [], "nosetests python/pydecode/test/") 59 | env.AlwaysBuild(pytests) 60 | 61 | pytests2 = env.Command("ignore_test2", [], "py.test notebooks") 62 | env.AlwaysBuild(pytests) 63 | 64 | env.Alias("pytest", [pytests, pytests2]) 65 | 66 | 67 | 68 | # Building the python library. 69 | 70 | env.Command(["python/pydecode/_pydecode.pyx",], 71 | ["python/pydecode/templates/algorithms.pyx.tpl", 72 | "python/pydecode/templates/algorithms.pxd.tpl", 73 | "python/pydecode/templates/chart.pyx", 74 | "python/pydecode/templates/beam.pyx.tpl", 75 | "python/pydecode/templates/cython_jinja.py"], 76 | "python python/pydecode/templates/cython_jinja.py") 77 | 78 | py_lib = env.Command(["python/pydecode/_pydecode.so"], 79 | ["build/debug/src/libdecoding.a", 80 | "python/pydecode/_pydecode.pyx", 81 | "python/pydecode/templates/libhypergraph.pyx"], 82 | "CC='ccache g++' python setup.py build_ext --inplace --verbose --cython --debug") 83 | env.Alias("pylib", [py_lib]) 84 | -------------------------------------------------------------------------------- /doc-requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.7 2 | numpydoc>=0.5 3 | scipy 4 | sphinx_rtd_theme>=0.1.6 5 | nose>1.0 6 | -------------------------------------------------------------------------------- /docs/build.sh: -------------------------------------------------------------------------------- 1 | cd notebooks/ 2 | ipython nbconvert *.ipynb --to rst 3 | cd doc/ 4 | ipython nbconvert *.ipynb --to rst 5 | cd ../../ 6 | make html 7 | -------------------------------------------------------------------------------- /docs/notebooks: -------------------------------------------------------------------------------- 1 | ../notebooks/ -------------------------------------------------------------------------------- /docs/source/_images/BuildingHypergraph_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/BuildingHypergraph_14_0.png -------------------------------------------------------------------------------- /docs/source/_images/BuildingHypergraph_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/BuildingHypergraph_7_0.png -------------------------------------------------------------------------------- /docs/source/_images/EditDistance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/EditDistance.png -------------------------------------------------------------------------------- /docs/source/_images/EditDistance_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/EditDistance_14_0.png -------------------------------------------------------------------------------- /docs/source/_images/EditDistance_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/EditDistance_7_0.png -------------------------------------------------------------------------------- /docs/source/_images/EditDistance_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/EditDistance_9_0.png -------------------------------------------------------------------------------- /docs/source/_images/Fibonacci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/Fibonacci.png -------------------------------------------------------------------------------- /docs/source/_images/Fibonacci_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/Fibonacci_5_0.png -------------------------------------------------------------------------------- /docs/source/_images/decipher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/decipher.png -------------------------------------------------------------------------------- /docs/source/_images/decipher_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/decipher_15_0.png -------------------------------------------------------------------------------- /docs/source/_images/decipher_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/decipher_17_0.png -------------------------------------------------------------------------------- /docs/source/_images/decipher_28_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/decipher_28_0.png -------------------------------------------------------------------------------- /docs/source/_images/decipher_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/decipher_7_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_11_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_13_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_14_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_15_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_16_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_17_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_21_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_21_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_23_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_23_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_24_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_24_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_25_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_25_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_27_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_27_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_28_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_28_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_29_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_29_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_30_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_30_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_31_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_31_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_33_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_33_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_34_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_34_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_35_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_35_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_37_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_37_0.png -------------------------------------------------------------------------------- /docs/source/_images/hmm_40_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hmm_40_0.png -------------------------------------------------------------------------------- /docs/source/_images/hypergraphs_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hypergraphs_4_0.png -------------------------------------------------------------------------------- /docs/source/_images/hypergraphs_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/hypergraphs_5_0.png -------------------------------------------------------------------------------- /docs/source/_images/parsing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/parsing.png -------------------------------------------------------------------------------- /docs/source/_images/parsing_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/parsing_12_0.png -------------------------------------------------------------------------------- /docs/source/_images/parsing_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/parsing_4_0.png -------------------------------------------------------------------------------- /docs/source/_images/parsing_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/parsing_9_0.png -------------------------------------------------------------------------------- /docs/source/_images/parsing_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/parsing_9_1.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_16_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_20_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_20_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_22_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_23_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_23_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_27_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_27_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_7_0.png -------------------------------------------------------------------------------- /docs/source/_images/tutorial_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/docs/source/_images/tutorial_9_0.png -------------------------------------------------------------------------------- /docs/source/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/development.rst: -------------------------------------------------------------------------------- 1 | .. PyDecode documentation master file, created by 2 | sphinx-quickstart on Sat Sep 28 10:03:34 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | Development 8 | ------------- 9 | 10 | .. include:: ../../DEVELOPMENT.rst 11 | -------------------------------------------------------------------------------- /docs/source/examples.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | Examples 3 | =========== 4 | 5 | .. .. toctree:: 6 | .. :maxdepth: 2 7 | 8 | .. notebooks/hypergraphs 9 | -------------------------------------------------------------------------------- /docs/source/full.bib: -------------------------------------------------------------------------------- 1 | @InProceedings{rush2010, 2 | title={{On Dual Decomposition and Linear Programming Relaxations for 3 | Natural Language Processing}}, 4 | author={A.M. Rush and 5 | D. Sontag and 6 | M. Collins and 7 | T. Jaakkola}, 8 | booktitle={Proc. EMNLP}, 9 | year={2010} 10 | } 11 | 12 | 13 | @incollection{allauzen2007openfst, 14 | title={OpenFst: A general and efficient weighted finite-state transducer library}, 15 | author={Allauzen, Cyril and Riley, Michael and Schalkwyk, Johan and Skut, Wojciech and Mohri, Mehryar}, 16 | booktitle={Implementation and Application of Automata}, 17 | pages={11--23}, 18 | year={2007}, 19 | publisher={Springer} 20 | } 21 | 22 | @article{goodman1998parsing, 23 | title={Parsing inside-out}, 24 | author={Goodman, Joshua}, 25 | journal={arXiv preprint cmp-lg/9805007}, 26 | year={1998} 27 | } 28 | @article{martin1990, 29 | title={{Polyhedral characterization of discrete dynamic programming}}, 30 | author={Martin, R.K. and Rardin, R.L. and Campbell, B.A.}, 31 | journal={Operations research}, 32 | volume=38, 33 | number=1, 34 | pages={127--138}, 35 | year=1990 36 | } 37 | 38 | 39 | @incollection{klein2005parsing, 40 | title={Parsing and hypergraphs}, 41 | author={Klein, Dan and Manning, Christopher D}, 42 | booktitle={New developments in parsing technology}, 43 | pages={351--372}, 44 | year={2005}, 45 | publisher={Springer} 46 | } 47 | 48 | @incollection{huang2005better, 49 | title={Better k-best parsing}, 50 | author={Huang, Liang and Chiang, David}, 51 | booktitle={Proceedings of the Ninth International Workshop on Parsing Technology}, 52 | pages={53--64}, 53 | year={2005}, 54 | organization={Association for Computational Linguistics} 55 | } 56 | 57 | @inproceedings{eisner1999efficient, 58 | title={Efficient parsing for bilexical context-free grammars and head automaton grammars}, 59 | author={Eisner, Jason and Satta, Giorgio}, 60 | booktitle={Proceedings of the 37th annual meeting of the Association for Computational Linguistics on Computational Linguistics}, 61 | pages={457--464}, 62 | year={1999}, 63 | organization={Association for Computational Linguistics} 64 | } 65 | 66 | @article{younger1967recognition, 67 | title={Recognition and parsing of context-free languages in time< i> n< sup> 3}, 68 | author={Younger, Daniel H}, 69 | journal={Information and control}, 70 | volume={10}, 71 | number={2}, 72 | pages={189--208}, 73 | year={1967}, 74 | publisher={Elsevier} 75 | } 76 | 77 | @inproceedings{sarawagi2004semi, 78 | title={Semi-markov conditional random fields for information extraction}, 79 | author={Sarawagi, Sunita and Cohen, William W}, 80 | booktitle={Advances in Neural Information Processing Systems}, 81 | pages={1185--1192}, 82 | year={2004} 83 | } 84 | 85 | @inproceedings{collins02perc, 86 | title = {{Discriminative Training Methods for Hidden Markov Models: Theory and Experiments with Perceptron Algorithms}}, 87 | author = {M. Collins}, 88 | booktitle = {Proc. EMNLP}, 89 | year = {2002}, 90 | pages = {1--8}, 91 | } 92 | -------------------------------------------------------------------------------- /docs/source/gallery.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Gallery 3 | ============ 4 | 5 | .. hlist:: 6 | :columns: 2 7 | 8 | * .. image:: _images/parsing_9_0.png 9 | :width: 500 px 10 | :align: center 11 | 12 | * .. image:: _images/hmm_25_0.png 13 | :width: 500 px 14 | :align: center 15 | 16 | * .. image:: _images/decipher_15_0.png 17 | :width: 500 px 18 | :align: center 19 | 20 | * .. image:: _images/tutorial_22_0.png 21 | :width: 500 px 22 | :align: center 23 | -------------------------------------------------------------------------------- /docs/source/hypergraph.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Hypergraphs 3 | ========== 4 | 5 | The dynamic 6 | 7 | 8 | Structure 9 | ========== 10 | 11 | Internally, PyDecode uses directed hypergraphs to represent the 12 | structure of a dynamic programming algorithms. It also includes 13 | a low-level interface for working with hypergraphs directly. 14 | 15 | .. automodule:: pydecode.potentials 16 | :no-members: 17 | :no-inherited-members: 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | :template: class.rst 22 | 23 | Hypergraph 24 | Vertex 25 | Edge 26 | Path 27 | 28 | .. _algorithms: 29 | 30 | Algorithms 31 | ========== 32 | 33 | The toolkit contains a collection of algorithms for working with hypergraphs. 34 | 35 | 36 | .. automodule:: pydecode 37 | :no-members: 38 | :no-inherited-members: 39 | 40 | .. autosummary:: 41 | :toctree: generated/ 42 | 43 | best_path 44 | inside 45 | outside 46 | marginals 47 | project 48 | binarize 49 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PyDecode documentation master file, created by 2 | sphinx-quickstart on Sat Sep 28 10:03:34 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ------------- 7 | PyDecode 8 | ------------- 9 | 10 | .. include:: ../../README.rst 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | setup 16 | notebooks/index 17 | api 18 | nlp 19 | extensions 20 | -------------------------------------------------------------------------------- /docs/source/nlp.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | NLP 3 | ========== 4 | 5 | Structured Models 6 | ----------------- 7 | ===================== ========= ===================================================== 8 | **StructuredEncoder** |enc| Encode a structured problem as a hypergraph. 9 | ===================== ========= ===================================================== 10 | 11 | .. |enc| replace:: [:doc:`doc`] 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :hidden: 16 | 17 | notebooks/doc/StructuredEncoder 18 | 19 | 20 | Dynamic Programs 21 | ---------------- 22 | 23 | ================ ========= ===================================================== 24 | **tagger** |tagger| Lattice construction for tagging. 25 | **semimarkov** |semi| Semi-markov tagging algorithm. 26 | **eisner** |eisner| Eisner's algorithm for dependency parsing. 27 | **cfg** |cfg| Parsing algorithm for Chomsky normal form grammar. 28 | ================ ========= ===================================================== 29 | 30 | .. |eisner| replace:: [:doc:`doc`] 31 | .. |tagger| replace:: [:doc:`doc`] 32 | .. |semi| replace:: [:doc:`doc`] 33 | .. |cfg| replace:: [:doc:`doc`] 34 | 35 | .. toctree:: 36 | :maxdepth: 2 37 | :hidden: 38 | 39 | notebooks/doc/tagger 40 | notebooks/doc/semimarkov 41 | notebooks/doc/eisner 42 | notebooks/doc/cfg 43 | 44 | 45 | Training 46 | -------- 47 | 48 | ================ ========= ===================================================== 49 | **DPModel** |dpm| Lattice construction for tagging. 50 | ================ ========= ===================================================== 51 | 52 | .. |dpm| replace:: [:doc:`doc`] 53 | -------------------------------------------------------------------------------- /docs/source/notebooks: -------------------------------------------------------------------------------- 1 | ../notebooks/ -------------------------------------------------------------------------------- /docs/source/setup.rst: -------------------------------------------------------------------------------- 1 | --------------- 2 | Setup 3 | --------------- 4 | 5 | Installation 6 | ===================== 7 | 8 | 9 | The easiest way to install PyDecode is through pip. :: 10 | 11 | $ pip install pydecode 12 | 13 | 14 | The base functionality of the library requires numpy_, scipy_ and Boost_. To install Boost on Debian/Ubuntu run:: 15 | 16 | $ sudo apt-get install libboost-dev 17 | 18 | 19 | Optional Dependencies 20 | ==================== 21 | 22 | PyDecode also includes extensions that integrate with other Python libraries to provide additional functionality. 23 | 24 | * **NetworkX, PyGraphviz, IPython** (:ref:`display`) 25 | 26 | Provides methods for model visualization. 27 | 28 | * **PyStruct** (:ref:`structured`) 29 | 30 | Provides methods for training the parameters of a model . 31 | 32 | * **PuLP and an LP solver** (:ref:`lp`) 33 | 34 | Provides methods for solving models using general-purpose 35 | linear-programming solvers. 36 | 37 | 38 | 39 | Running Notebooks 40 | ================= 41 | 42 | In addition to this documentation, the distribution also include a set 43 | of example tutorials written as IPython_ notebooks. 44 | 45 | These notebooks can be run locally after installation. Assuming ENV is 46 | the base install directory (for instance using virtualenv_), the 47 | notebooks can be run using:: 48 | 49 | $ ipython notebook ENV/pydecode/notebooks/ 50 | 51 | 52 | .. _numpy: http://numpy.org 53 | .. _scipy: http://scipy.org 54 | .. _Boost: http://boost.org 55 | .. _IPython: http://ipython.org 56 | .. _virtualenv: http://virtualenv.readthedocs.org/en/latest/ 57 | -------------------------------------------------------------------------------- /notebooks/BeamSearch.rst: -------------------------------------------------------------------------------- 1 | 2 | .. code:: python 3 | 4 | import pydecode.test.utils as utils 5 | import pydecode.hyper as ph 6 | .. code:: python 7 | 8 | hyper = utils.simple_hypergraph() 9 | w = ph.LogViterbiPotentials(hyper).from_vector([0.5 for edge in hyper.edges]) 10 | .. code:: python 11 | 12 | ph.best_path(hyper, w) 13 | 14 | 15 | 16 | .. parsed-literal:: 17 | 18 | 19 | 20 | 21 | 22 | .. code:: python 23 | 24 | import pydecode.display as display 25 | .. code:: python 26 | 27 | def build_constraints(l): 28 | b = ph.Bitset() 29 | if l == "0": 30 | b[1] = 1 31 | if l == "1": 32 | b[0] = 1 33 | b[2] = 1 34 | if l == "2": 35 | b[0] = 1 36 | if l == "3": 37 | b[1] = 1 38 | if l == "4": 39 | b[2] = 1 40 | print "".join(str(1 if b[i] else 0) for i in range(5)) 41 | return b 42 | cons = ph.BinaryVectorPotentials(hyper).from_vector([build_constraints(edge.label) for edge in hyper.edges]) 43 | 44 | .. parsed-literal:: 45 | 46 | 01000 47 | 10100 48 | 10000 49 | 01000 50 | 00100 51 | 52 | 53 | .. code:: python 54 | 55 | display.HypergraphFormatter(hyper).to_ipython() 56 | 57 | 58 | 59 | .. image:: BeamSearch_files/BeamSearch_5_0.png 60 | 61 | 62 | 63 | .. code:: python 64 | 65 | ins = ph.inside(hyper, w) 66 | out = ph.outside(hyper, w, ins) 67 | .. code:: python 68 | 69 | beam_chart = ph.beam_search_BinaryVector(hyper, w, cons, out, -10000, [0,0,0,0,1,2],[10], 3) 70 | .. code:: python 71 | 72 | for node in hyper.nodes: 73 | print node.label 74 | for (hyp, score, future) in beam_chart[node]: 75 | print "\t", score, 76 | print "".join(str(1 if hyp[i] else 0) for i in range(5)) 77 | 78 | print 79 | 80 | .. parsed-literal:: 81 | 82 | start 0 83 | 0.0 00000 84 | 85 | start 1 86 | 0.0 00000 87 | 88 | start 2 89 | 0.0 00000 90 | 91 | start 3 92 | 0.0 00000 93 | 94 | mid 95 | 0.5 10100 96 | 0.5 01000 97 | 98 | top 99 | 1.0 01100 100 | 1.0 11100 101 | 1.0 11000 102 | 103 | 104 | 105 | .. code:: python 106 | 107 | #display.HypergraphPathFormatter(hyper, [beam_chart.path(1)]).to_ipython() -------------------------------------------------------------------------------- /notebooks/BeamSearch_files/BeamSearch_2_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/BeamSearch_files/BeamSearch_4_0.text: -------------------------------------------------------------------------------- 1 | 01000 2 | 10100 3 | 10000 4 | 01000 5 | 00100 6 | -------------------------------------------------------------------------------- /notebooks/BeamSearch_files/BeamSearch_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/BeamSearch_files/BeamSearch_5_0.png -------------------------------------------------------------------------------- /notebooks/BeamSearch_files/BeamSearch_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/BeamSearch_files/BeamSearch_8_0.text: -------------------------------------------------------------------------------- 1 | start 0 2 | 0.0 00000 3 | 4 | start 1 5 | 0.0 00000 6 | 7 | start 2 8 | 0.0 00000 9 | 10 | start 3 11 | 0.0 00000 12 | 13 | mid 14 | 0.5 10100 15 | 0.5 01000 16 | 17 | top 18 | 1.0 01100 19 | 1.0 11100 20 | 1.0 11000 21 | 22 | -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph.rst: -------------------------------------------------------------------------------- 1 | 2 | Hypergraph Interface 3 | ==================== 4 | 5 | 6 | .. code:: python 7 | 8 | import pydecode.hyper as ph 9 | .. code:: python 10 | 11 | hyper1 = ph.Hypergraph() 12 | The code assumes that the hypergraph is immutable. The python interface 13 | enforces this by using a builder pattern. The important function to 14 | remember is add\_node. 15 | 16 | - If there no arguments, then a terminal node is created. Terminal 17 | nodes must be created first. 18 | - If it is given an iterable, it create hyperedges to the new node. 19 | Each element in the iterable is a pair 20 | - A list of tail nodes for that edge. 21 | - A label for that edge. 22 | 23 | 24 | .. code:: python 25 | 26 | with hyper1.builder() as b: 27 | node_a = b.add_node(label = "a") 28 | node_b = b.add_node(label = "b") 29 | node_c = b.add_node(label = "c") 30 | node_d = b.add_node(label = "d") 31 | node_e = b.add_node([([node_b, node_c], "First Edge")], label = "e") 32 | b.add_node([([node_a, node_e], "Second Edge"), 33 | ([node_a, node_d], "Third Edge")], label = "f") 34 | Outside of the ``with`` block the hypergraph is considered finished and 35 | no new nodes can be added. 36 | 37 | We can also display the hypergraph to see our work. 38 | 39 | .. code:: python 40 | 41 | import pydecode.display as display 42 | display.HypergraphFormatter(hyper1).to_ipython() 43 | 44 | 45 | 46 | .. image:: BuildingHypergraph_files/BuildingHypergraph_7_0.png 47 | 48 | 49 | 50 | After creating the hypergraph we can assign additional property 51 | information. One useful property is to add potentials. We do this by 52 | defining a function to map labels to potentials. 53 | 54 | .. code:: python 55 | 56 | def build_potentials(label): 57 | if "First" in label: return 1 58 | if "Second" in label: return 5 59 | if "Third" in label: return 5 60 | return 0 61 | potentials = ph.LogViterbiPotentials(hyper1).from_vector((build_potentials(edge.label) 62 | for edge in hyper1.edges)) 63 | .. code:: python 64 | 65 | for edge in hyper1.edges: 66 | print edge.label, potentials[edge] 67 | 68 | .. parsed-literal:: 69 | 70 | First Edge 1.0 71 | Second Edge 5.0 72 | Third Edge 5.0 73 | 74 | 75 | We use the best path. 76 | 77 | .. code:: python 78 | 79 | path = ph.best_path(hyper1, potentials) 80 | .. code:: python 81 | 82 | print potentials.dot(path) 83 | 84 | .. parsed-literal:: 85 | 86 | 6.0 87 | 88 | 89 | .. code:: python 90 | 91 | display.HypergraphFormatter(hyper1).to_ipython() 92 | 93 | 94 | 95 | .. image:: BuildingHypergraph_files/BuildingHypergraph_14_0.png 96 | 97 | 98 | -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_10_0.text: -------------------------------------------------------------------------------- 1 | First Edge 1.0 2 | Second Edge 5.0 3 | Third Edge 5.0 4 | -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_13_0.text: -------------------------------------------------------------------------------- 1 | 6.0 2 | -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/BuildingHypergraph_files/BuildingHypergraph_14_0.png -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_14_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/BuildingHypergraph_files/BuildingHypergraph_7_0.png -------------------------------------------------------------------------------- /notebooks/BuildingHypergraph_files/BuildingHypergraph_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/Debugging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "# c = chart.ChartBuilder(lambda a:a, chart.HypergraphSemiRing, \n", 15 | "# build_hypergraph=True, debug=True)\n", 16 | "# hypergraph = edit_distance(c, (\"aab\", \"bbb\")).finish()" 17 | ], 18 | "language": "python", 19 | "metadata": {}, 20 | "outputs": [] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "collapsed": false, 25 | "input": [ 26 | "%%latex\n", 27 | "$C_10$" 28 | ], 29 | "language": "python", 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "latex": [ 34 | "$C_10$" 35 | ], 36 | "metadata": {}, 37 | "output_type": "display_data", 38 | "text": [ 39 | "" 40 | ] 41 | } 42 | ], 43 | "prompt_number": 2 44 | }, 45 | { 46 | "cell_type": "code", 47 | "collapsed": false, 48 | "input": [], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [] 52 | } 53 | ], 54 | "metadata": {} 55 | } 56 | ] 57 | } -------------------------------------------------------------------------------- /notebooks/Debugging.rst: -------------------------------------------------------------------------------- 1 | 2 | .. code:: python 3 | 4 | # c = chart.ChartBuilder(lambda a:a, chart.HypergraphSemiRing, 5 | # build_hypergraph=True, debug=True) 6 | # hypergraph = edit_distance(c, ("aab", "bbb")).finish() 7 | .. code:: python 8 | 9 | %%latex 10 | $C_10$ 11 | 12 | 13 | .. math:: 14 | 15 | C_10 16 | 17 | 18 | .. code:: python 19 | 20 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_10_0.text: -------------------------------------------------------------------------------- 1 | array([[-1, -1, -1, -1, -1, -1], 2 | [-1, 7, 8, 9, 10, 11], 3 | [-1, 13, 14, 15, 16, 17], 4 | [-1, 19, 20, 21, 22, 23], 5 | [-1, 25, 26, 27, 28, 29], 6 | [-1, 31, 32, 33, 34, 35]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_11_0.text: -------------------------------------------------------------------------------- 1 | array([[ 0, 1, 2, 3, 4, 5], 2 | [ 6, 7, 8, 9, 10, 11], 3 | [12, 13, 14, 15, 16, 17], 4 | [18, 19, 20, 21, 22, 23], 5 | [24, 25, 26, 27, 28, 29], 6 | [30, 31, 32, 33, 34, 35]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_12_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_12_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_13_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_13_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_15_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_15_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_16_0.text: -------------------------------------------------------------------------------- 1 | array([[4, 4, 2], 2 | [3, 3, 2], 3 | [2, 3, 0], 4 | [2, 2, 1], 5 | [1, 1, 2]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_17_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_17_0.text: -------------------------------------------------------------------------------- 1 | array([[4, 4, 2], 2 | [3, 3, 2], 3 | [2, 3, 0], 4 | [2, 2, 1], 5 | [1, 1, 2]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_18_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_18_0.text: -------------------------------------------------------------------------------- 1 | array([[0, 0], 2 | [1, 1], 3 | [2, 2]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_19_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_19_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_19_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_20_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_20_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_20_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_21_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_21_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_21_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_22_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_22_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_23_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_23_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_23_0.text: -------------------------------------------------------------------------------- 1 | 22 -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_24_0.text: -------------------------------------------------------------------------------- 1 | array([ 4, 9, 2, 7, 12, 5, 13, 18, 11, 16, 21, 14], dtype=int32) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_25_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_25_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_25_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_26_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_26_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_26_0.text: -------------------------------------------------------------------------------- 1 | [[ 2.00000000e+00 1.00000000e+00 -1.00000000e+09] 2 | [ 1.00000000e+00 2.00000000e+00 -1.00000000e+09] 3 | [ -1.00000000e+09 -1.00000000e+09 -1.00000000e+09]] 4 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_27_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_27_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_27_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_29_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/EditDistance_files/EditDistance_29_0.png -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_29_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_6_0.text: -------------------------------------------------------------------------------- 1 | array([[ 0, 1, 2, 3, 4], 2 | [ 5, 6, 7, 8, 9], 3 | [10, 11, 12, 13, 14], 4 | [15, 16, 17, 18, 19], 5 | [20, 21, 22, 23, 24]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_8_0.text: -------------------------------------------------------------------------------- 1 | array([[0, 1], 2 | [0, 2], 3 | [0, 3], 4 | [0, 4]]) -------------------------------------------------------------------------------- /notebooks/EditDistance_files/EditDistance_9_0.text: -------------------------------------------------------------------------------- 1 | array([[-1, -1, -1, -1, -1, -1], 2 | [-1, 7, 8, 9, 10, 11], 3 | [-1, 13, 14, 15, 16, 17], 4 | [-1, 19, 20, 21, 22, 23], 5 | [-1, 25, 26, 27, 28, 29], 6 | [-1, 31, 32, 33, 34, 35]]) -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_10_0.text: -------------------------------------------------------------------------------- 1 | array([ 0., 1., 2., 4., 7., 12., 20., 33., 54., 88.]) -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/Fibonacci_files/Fibonacci_12_0.png -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_12_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/Fibonacci_files/Fibonacci_14_0.png -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_14_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/Fibonacci_files/Fibonacci_15_0.png -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_15_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_4_0.text: -------------------------------------------------------------------------------- 1 | [1, 1, 2, 3, 5, 8, 13, 21, 34] 2 | -------------------------------------------------------------------------------- /notebooks/Fibonacci_files/Fibonacci_5_0.text: -------------------------------------------------------------------------------- 1 | array([ 0., 1., 1., 2., 3., 5., 8., 13., 21., 34.]) -------------------------------------------------------------------------------- /notebooks/Lattice_files/Lattice_23_0.text: -------------------------------------------------------------------------------- 1 | 12.6431069308 2 | (0, 0) 3 | (1, 9) 4 | (2, 10) 5 | (3, 4) 6 | (4, 7) 7 | (5, 12) 8 | (6, 2) 9 | (7, 8) 10 | (8, 6) 11 | (9, 3) 12 | (10, 5) 13 | (11, 11) 14 | (12, 1) 15 | (13, 0) 16 | (14, 13) 17 | -------------------------------------------------------------------------------- /notebooks/Lattice_files/Lattice_3_0.text: -------------------------------------------------------------------------------- 1 | [[0, 1, 2], [0, 1, 2], [0, 1, 2]] 2 | -------------------------------------------------------------------------------- /notebooks/Lattice_files/Lattice_6_0.text: -------------------------------------------------------------------------------- 1 | 0 0 2 | 1 0 3 | 1 1 4 | 1 2 5 | 1 0 6 | 1 1 7 | 1 2 8 | 2 0 9 | 2 1 10 | 2 2 11 | 3 0 12 | 3 1 13 | 3 2 14 | 4 0 15 | -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_11_1.text: -------------------------------------------------------------------------------- 1 | StructuredPerceptron(average=False, batch=False, decay_exponent=0, 2 | decay_t0=10, logger=None, max_iter=5, 3 | model=BetterBigramTagger, size_joint_feature: 330, n_jobs=1, 4 | verbose=1) -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_17_0.text: -------------------------------------------------------------------------------- 1 | [[0], [0], [1], [0]] -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_1_0.text: -------------------------------------------------------------------------------- 1 | 33 -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_4_0.text: -------------------------------------------------------------------------------- 1 | [(0, 2, 0), (1, 0, 2), (2, 2, 0)] 2 | [(0, 0, 0), (1, 0, 0), (2, 0, 0)] 3 | -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_6_0.text: -------------------------------------------------------------------------------- 1 | ([['START', 'the', 'dog', 'walked', 'END'], 2 | ['START', 'in', 'the', 'park', 'END'], 3 | ['START', 'in', 'the', 'dog', 'END']], 4 | [['START', 'D', 'N', 'V', 'END'], 5 | ['START', 'I', 'D', 'N', 'END'], 6 | ['START', 'I', 'D', 'N', 'END']]) -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_7_0.text: -------------------------------------------------------------------------------- 1 | iteration 0 2 | ['START', 'D', 'N', 'V', 'END'] ['START' 'N' 'N' 'N' 'END'] 3 | ['START', 'I', 'D', 'N', 'END'] ['START' 'D' 'V' 'V' 'END'] 4 | ['START', 'I', 'D', 'N', 'END'] ['START' 'I' 'D' 'N' 'END'] 5 | avg loss: 0.066667 w: [ 1. 0. 1. -1. 0. -1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 6 | 0. 0. 0. 0. 1. 0. 2. 0. 0. -2. -1. 0. 0. 0. 0. 0. 0. 0. 7 | -1. 0. 0. 1. 0. -1.] 8 | effective learning rate: 1.000000 9 | iteration 1 10 | ['START', 'D', 'N', 'V', 'END'] ['START' 'I' 'D' 'N' 'END'] 11 | ['START', 'I', 'D', 'N', 'END'] ['START' 'D' 'N' 'V' 'END'] 12 | ['START', 'I', 'D', 'N', 'END'] ['START' 'I' 'D' 'N' 'END'] 13 | avg loss: 0.080000 w: [ 1. 0. 1. -1. 0. -1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 14 | 0. 0. 0. 0. 1. 0. 2. 0. 0. -2. -1. 0. 0. 0. 0. 0. 0. 0. 15 | -1. 0. 0. 1. 0. -1.] 16 | effective learning rate: 1.000000 17 | iteration 2 18 | ['START', 'D', 'N', 'V', 'END'] ['START' 'I' 'D' 'N' 'END'] 19 | ['START', 'I', 'D', 'N', 'END'] ['START' 'D' 'N' 'V' 'END'] 20 | ['START', 'I', 'D', 'N', 'END'] ['START' 'I' 'D' 'N' 'END'] 21 | avg loss: 0.080000 w: [ 1. 0. 1. -1. 0. -1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 22 | 0. 0. 0. 0. 1. 0. 2. 0. 0. -2. -1. 0. 0. 0. 0. 0. 0. 0. 23 | -1. 0. 0. 1. 0. -1.] 24 | effective learning rate: 1.000000 25 | -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_7_1.text: -------------------------------------------------------------------------------- 1 | StructuredPerceptron(average=False, batch=False, decay_exponent=0, 2 | decay_t0=10, logger=None, max_iter=3, 3 | model=BigramTagger, size_joint_feature: 42, n_jobs=1, verbose=1) -------------------------------------------------------------------------------- /notebooks/PerceptronTagger_files/PerceptronTagger_9_0.text: -------------------------------------------------------------------------------- 1 | [array([2, 1, 0]), 2 | array([2, 1, 0]), 3 | array([2, 1, 0]), 4 | array([2, 1, 0]), 5 | array([2, 0, 1]), 6 | array([0, 1, 2]), 7 | array([0, 1, 2])] -------------------------------------------------------------------------------- /notebooks/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os,sys 3 | 4 | wrapped_stdin = sys.stdin 5 | sys.stdin = sys.__stdin__ 6 | from IPython.kernel import KernelManager 7 | sys.stdin = wrapped_stdin 8 | 9 | from IPython.nbformat.current import reads 10 | 11 | # combined from 12 | # http://pytest.org/latest/example/nonpython.html#non-python-tests 13 | # and 14 | # https://gist.github.com/2621679 by minrk 15 | 16 | tests = ["Fibonacci", "EditDistance", "hmm", "parsing", "PerceptronTagger"] 17 | 18 | def pytest_collect_file(path, parent): 19 | print path 20 | if path.ext == ".ipynb" and any([t + "." in str(path) for t in tests]): 21 | return IPyNbFile(path, parent) 22 | 23 | class IPyNbFile(pytest.File): 24 | def collect(self): 25 | with self.fspath.open() as f: 26 | self.nb = reads(f.read(), 'json') 27 | 28 | cell_num = 0 29 | 30 | for ws in self.nb.worksheets: 31 | for cell in ws.cells: 32 | if cell.cell_type == "code": 33 | yield IPyNbCell(self.name, self, cell_num, cell) 34 | cell_num += 1 35 | 36 | def setup(self): 37 | self.km = KernelManager() 38 | self.km.start_kernel(stderr=open(os.devnull, 'w')) 39 | self.kc = self.km.client() 40 | self.kc.start_channels() 41 | self.shell = self.kc.shell_channel 42 | 43 | def teardown(self): 44 | self.km.shutdown_kernel() 45 | del self.shell 46 | del self.km 47 | 48 | class IPyNbCell(pytest.Item): 49 | def __init__(self, name, parent, cell_num, cell): 50 | super(IPyNbCell, self).__init__(name, parent) 51 | 52 | self.cell_num = cell_num 53 | self.cell = cell 54 | 55 | def runtest(self): 56 | print "running" 57 | shell = self.parent.shell 58 | shell.execute(self.cell.input, allow_stdin=False) 59 | reply = shell.get_msg(timeout=20)['content'] 60 | if reply['status'] == 'error': 61 | raise IPyNbException(self.cell_num, self.cell.input, '\n'.join(reply['traceback'])) 62 | 63 | def repr_failure(self, excinfo): 64 | """ called when self.runtest() raises an exception. """ 65 | if isinstance(excinfo.value, IPyNbException): 66 | return "\n".join([ 67 | "notebook worksheet execution failed", 68 | " cell %s\n\n" 69 | " input: %s\n\n" 70 | " raised: %s\n" % excinfo.value.args[0:3], 71 | ]) 72 | 73 | 74 | def reportinfo(self): 75 | return self.fspath, 0, "cell %d" % self.cell_num 76 | 77 | class IPyNbException(Exception): 78 | """ custom exception for error reporting. """ 79 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_12_0.text: -------------------------------------------------------------------------------- 1 | 0.0764546410063 2 | 0.627081863809 3 | 0.0661244842873 4 | 0.311587044669 5 | 0.836643013106 6 | 0.0962409740027 7 | 0.0366856407542 8 | 0.608319172251 9 | 0.0415800024809 10 | 0.190480334343 11 | 0.44535181756 12 | 0.365934762998 13 | 0.619941764689 14 | 0.358337453464 15 | 0.0 16 | 0.0 17 | 0.0 18 | 0.0 19 | 0.0 20 | 0.0 21 | 0.0 22 | 0.0 23 | 0.0 24 | 0.0 25 | 0.0 26 | 0.0 27 | 0.0 28 | 0.0 29 | 0.0 30 | 0.0 31 | 0.0 32 | 0.0 33 | 0.0 34 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_13_0.text: -------------------------------------------------------------------------------- 1 | 2.083666641604527 -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/decipher_files/decipher_15_0.png -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_15_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_16_0.text: -------------------------------------------------------------------------------- 1 | 1.46372487692 2 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_16_1.text: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_21_0.text: -------------------------------------------------------------------------------- 1 | 4650 2 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_22_0.text: -------------------------------------------------------------------------------- 1 | 11 2 | -2.07941654387 3 | 221 4 | 0.0 5 | 298 6 | 0.0 7 | 648 8 | -1.09861228867 9 | 702 10 | -0.405481773803 11 | 709 12 | -1.45088787965 13 | 814 14 | -0.510852289188 15 | 951 16 | -0.69314718056 17 | 971 18 | -2.07941654387 19 | 1181 20 | 0.0 21 | 1258 22 | 0.0 23 | 1428 24 | -1.09861228867 25 | 1451 26 | -2.07941654387 27 | 1661 28 | 0.0 29 | 1738 30 | 0.0 31 | 1908 32 | -0.693234675638 33 | 2190 34 | -0.693172179622 35 | 2449 36 | -0.510852289188 37 | 2586 38 | -0.69314718056 39 | 2865 40 | -0.693172179622 41 | 3124 42 | -0.510852289188 43 | 3261 44 | -0.69314718056 45 | 3281 46 | -2.07941654387 47 | 3491 48 | 0.0 49 | 3568 50 | 0.0 51 | 3888 52 | -1.09861228867 53 | 3970 54 | -0.693234675638 55 | 4245 56 | -0.693172179622 57 | 4504 58 | -0.510852289188 59 | 4641 60 | -0.69314718056 61 | -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_22_1.text: -------------------------------------------------------------------------------- 1 | -21.751856464057795 -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/decipher_files/decipher_7_0.png -------------------------------------------------------------------------------- /notebooks/decipher_files/decipher_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.ChartBuilder 3 | ===================== 4 | 5 | 6 | .. note:: 7 | This section gives a formal overview of the use of ``ChartBuilder``. For a series of tutorials and practical examples see :doc:`../index`. 8 | 9 | 10 | 11 | .. currentmodule:: pydecode 12 | .. autoclass:: ChartBuilder 13 | :members: init, set 14 | 15 | 16 | 17 | Examples 18 | -------- 19 | 20 | 21 | .. code:: python 22 | 23 | import pydecode 24 | import numpy as np 25 | items = np.arange(10) 26 | .. code:: python 27 | 28 | chart = pydecode.ChartBuilder(items) 29 | chart.init(items[0]) 30 | chart.set(items[1], 31 | [[items[0]]], 32 | labels=[0]) 33 | pydecode.draw(chart.finish(), labels=True) 34 | 35 | 36 | 37 | .. image:: ChartBuilder_files/ChartBuilder_7_0.png 38 | 39 | 40 | 41 | Computes 42 | 43 | .. math:: f(w) = (\bar{1} \otimes w(0)) \oplus (\bar{1} \otimes w(1)) \oplus (\bar{1} \otimes w(2)) 44 | 45 | 46 | 47 | .. code:: python 48 | 49 | chart = pydecode.ChartBuilder(items) 50 | chart.init(items[0:3]) 51 | chart.set(items[3], 52 | [[items[0]], [items[1]], [items[2]]], 53 | labels=[0,1,2]) 54 | pydecode.draw(chart.finish(), labels=True) 55 | 56 | 57 | 58 | .. image:: ChartBuilder_files/ChartBuilder_9_0.png 59 | 60 | 61 | 62 | Represents 63 | 64 | .. math:: f(w) = (\bar{1} \otimes \bar{1} \otimes \bar{1} \otimes w(0)) 65 | 66 | 67 | 68 | .. code:: python 69 | 70 | chart = pydecode.ChartBuilder(items) 71 | chart.init(items[0:3]) 72 | chart.set(items[3], 73 | [[items[0], items[1], items[2]]], 74 | labels=[0]) 75 | pydecode.draw(chart.finish(), labels=True) 76 | 77 | 78 | 79 | .. image:: ChartBuilder_files/ChartBuilder_11_0.png 80 | 81 | 82 | 83 | Same functions using ``set_t``. 84 | 85 | .. code:: python 86 | 87 | chart = pydecode.ChartBuilder(items) 88 | chart.init(items[0:3]) 89 | chart.set_t(items[3], items[0:3], labels=np.arange(3)) 90 | pydecode.draw(chart.finish(), labels=True) 91 | 92 | 93 | 94 | .. image:: ChartBuilder_files/ChartBuilder_13_0.png 95 | 96 | 97 | 98 | .. code:: python 99 | 100 | chart = pydecode.ChartBuilder(items) 101 | chart.init(items[0:3]) 102 | chart.set_t(items[3], items[[0]], items[[1]], items[[2]], 103 | labels=np.array([0])) 104 | pydecode.draw(chart.finish(), labels=True) 105 | 106 | 107 | 108 | .. image:: ChartBuilder_files/ChartBuilder_14_0.png 109 | 110 | 111 | 112 | A more interesting use of ``set_t``. 113 | 114 | .. code:: python 115 | 116 | chart = pydecode.ChartBuilder(items) 117 | chart.init(items[0:9]) 118 | chart.set_t(items[9], items[0:8], items[1:9], labels=np.arange(8)) 119 | pydecode.draw(chart.finish(), labels=True) 120 | 121 | 122 | 123 | .. image:: ChartBuilder_files/ChartBuilder_16_0.png 124 | 125 | 126 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_10_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_10_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_11_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_11_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_12_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_12_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_13_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_13_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_14_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_14_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_15_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_15_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_16_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_16_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_7_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_8_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_8_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/ChartBuilder_files/ChartBuilder_9_0.png -------------------------------------------------------------------------------- /notebooks/doc/ChartBuilder_files/ChartBuilder_9_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/Hypergraph.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 1, 13 | "source": [ 14 | "pydecode.Hypergraph" 15 | ] 16 | }, 17 | { 18 | "cell_type": "raw", 19 | "source": [ 20 | ".. note::\n", 21 | " This section gives a formal overview of the use of ``Hypergraph``. For a series of tutorials and practical examples see :doc:`../index`." 22 | ] 23 | }, 24 | { 25 | "cell_type": "raw", 26 | "source": [ 27 | ".. currentmodule:: pydecode \n", 28 | ".. autoclass:: Hypergraph" 29 | ] 30 | }, 31 | { 32 | "cell_type": "raw", 33 | "source": [ 34 | ".. currentmodule:: pydecode \n", 35 | ".. autoclass:: Vertex" 36 | ] 37 | }, 38 | { 39 | "cell_type": "raw", 40 | "source": [ 41 | ".. currentmodule:: pydecode \n", 42 | ".. autoclass:: Edge" 43 | ] 44 | }, 45 | { 46 | "cell_type": "raw", 47 | "source": [ 48 | ".. currentmodule:: pydecode \n", 49 | ".. autoclass:: Path" 50 | ] 51 | }, 52 | { 53 | "cell_type": "heading", 54 | "level": 2, 55 | "source": [ 56 | "Examples" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "collapsed": false, 62 | "input": [ 63 | "import pydecode\n", 64 | "import pydecode.test.utils\n", 65 | "hypergraph = pydecode.test.utils.simple_hypergraph()" 66 | ], 67 | "language": "python", 68 | "outputs": [], 69 | "prompt_number": 5 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": false, 74 | "input": [ 75 | "for vertex in hypergraph.vertices:\n", 76 | " print vertex.id, vertex.is_terminal\n", 77 | " for edge in vertex.edges:\n", 78 | " print \"\\t\", edge.id, edge.label" 79 | ], 80 | "language": "python", 81 | "outputs": [ 82 | { 83 | "output_type": "stream", 84 | "stream": "stdout", 85 | "text": [ 86 | "0 True\n", 87 | "1 True\n", 88 | "2 True\n", 89 | "3 True\n", 90 | "4 False\n", 91 | "\t0 -1\n", 92 | "\t1 -1\n", 93 | "5 False\n", 94 | "\t2 -1\n" 95 | ] 96 | } 97 | ], 98 | "prompt_number": 11 99 | }, 100 | { 101 | "cell_type": "heading", 102 | "level": 2, 103 | "source": [ 104 | "Bibliography" 105 | ] 106 | }, 107 | { 108 | "cell_type": "raw", 109 | "source": [ 110 | ".. bibliography:: ../../full.bib \n", 111 | " :filter: key in {\"martin1990\",\"klein2005parsing\"}\n", 112 | " :style: plain" 113 | ] 114 | } 115 | ] 116 | } 117 | ] 118 | } -------------------------------------------------------------------------------- /notebooks/doc/Hypergraph.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.Hypergraph 3 | =================== 4 | 5 | 6 | .. note:: 7 | This section gives a formal overview of the use of ``Hypergraph``. For a series of tutorials and practical examples see :doc:`../index`. 8 | 9 | .. currentmodule:: pydecode 10 | .. autoclass:: Hypergraph 11 | 12 | .. currentmodule:: pydecode 13 | .. autoclass:: Vertex 14 | 15 | .. currentmodule:: pydecode 16 | .. autoclass:: Edge 17 | 18 | .. currentmodule:: pydecode 19 | .. autoclass:: Path 20 | 21 | Examples 22 | -------- 23 | 24 | 25 | .. code:: python 26 | 27 | import pydecode 28 | import pydecode.test.utils 29 | hypergraph = pydecode.test.utils.simple_hypergraph() 30 | .. code:: python 31 | 32 | for vertex in hypergraph.vertices: 33 | print vertex.id, vertex.is_terminal 34 | for edge in vertex.edges: 35 | print "\t", edge.id, edge.label 36 | 37 | .. parsed-literal:: 38 | 39 | 0 True 40 | 1 True 41 | 2 True 42 | 3 True 43 | 4 False 44 | 0 -1 45 | 1 -1 46 | 5 False 47 | 2 -1 48 | 49 | 50 | Bibliography 51 | ------------ 52 | 53 | 54 | .. bibliography:: ../../full.bib 55 | :filter: key in {"martin1990","klein2005parsing"} 56 | :style: plain 57 | -------------------------------------------------------------------------------- /notebooks/doc/Hypergraph_files/Hypergraph_8_0.text: -------------------------------------------------------------------------------- 1 | 0 True 2 | 1 True 3 | 2 True 4 | 3 True 5 | 4 False 6 | 0 -1 7 | 1 -1 8 | 5 False 9 | 2 -1 10 | -------------------------------------------------------------------------------- /notebooks/doc/StructuredEncoder.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.StructuredEncoder 3 | ========================== 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: StructuredEncoder 8 | 9 | Examples 10 | -------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode 16 | import pydecode.encoder 17 | import numpy as np 18 | .. code:: python 19 | 20 | tags = ["D", "V", "N", "A"] 21 | sentence = "the dog walked to the park".split() 22 | .. code:: python 23 | 24 | class TaggingEncoder(pydecode.encoder.StructuredEncoder): 25 | def __init__(self, tags, sentence): 26 | self.T = len(tags) 27 | self.n = len(sentence) 28 | shape = (self.n, self.T) 29 | super(TaggingEncoder, self).__init__(shape) 30 | 31 | def from_parts(self, parts): 32 | tag_sequence = np.zeros(self.n) 33 | for part in parts: 34 | tag_sequence[part[0]] = part[1] 35 | return tag_sequence 36 | 37 | def transform_structure(self, structure): 38 | parts = [] 39 | for i, t in enumerate(structure): 40 | parts.append((i,t)) 41 | return np.array(parts) 42 | 43 | encoder = TaggingEncoder(tags, sentence) 44 | .. code:: python 45 | 46 | tag_sequence = np.array([3,2,3,1,0, 2]) 47 | parts = encoder.transform_structure(tag_sequence) 48 | parts 49 | 50 | 51 | 52 | .. parsed-literal:: 53 | 54 | array([[0, 3], 55 | [1, 2], 56 | [2, 3], 57 | [3, 1], 58 | [4, 0], 59 | [5, 2]]) 60 | 61 | 62 | 63 | .. code:: python 64 | 65 | encoder.from_parts(parts) 66 | 67 | 68 | 69 | .. parsed-literal:: 70 | 71 | array([ 3., 2., 3., 1., 0., 2.]) 72 | 73 | 74 | 75 | .. code:: python 76 | 77 | labels = encoder.encoder[tuple(parts.T)] 78 | labels 79 | 80 | 81 | 82 | .. parsed-literal:: 83 | 84 | array([ 3, 6, 11, 13, 16, 22]) 85 | 86 | 87 | 88 | .. code:: python 89 | 90 | parts = encoder.transform_labels(labels) 91 | parts 92 | 93 | 94 | 95 | .. parsed-literal:: 96 | 97 | array([[0, 3], 98 | [1, 2], 99 | [2, 3], 100 | [3, 1], 101 | [4, 0], 102 | [5, 2]]) 103 | 104 | 105 | 106 | Invariants 107 | ---------- 108 | 109 | 110 | Transform between parts and labels and parts is identity. 111 | 112 | .. code:: python 113 | 114 | def test_transform(): 115 | shape = (10, 15) 116 | encoder = pydecode.encoder.StructuredEncoder(shape) 117 | a = np.random.randint(10, size=10) 118 | b = np.random.randint(15, size=10) 119 | 120 | parts = np.vstack((a.T, b.T)).T 121 | labels = encoder.encoder[tuple(parts.T)] 122 | reparts = encoder.transform_labels(labels) 123 | assert (parts == reparts).all() 124 | test_transform() -------------------------------------------------------------------------------- /notebooks/doc/StructuredEncoder_files/StructuredEncoder_6_0.text: -------------------------------------------------------------------------------- 1 | array([[0, 3], 2 | [1, 2], 3 | [2, 3], 4 | [3, 1], 5 | [4, 0], 6 | [5, 2]]) -------------------------------------------------------------------------------- /notebooks/doc/StructuredEncoder_files/StructuredEncoder_7_0.text: -------------------------------------------------------------------------------- 1 | array([ 3., 2., 3., 1., 0., 2.]) -------------------------------------------------------------------------------- /notebooks/doc/StructuredEncoder_files/StructuredEncoder_8_0.text: -------------------------------------------------------------------------------- 1 | array([ 3, 6, 11, 13, 16, 22]) -------------------------------------------------------------------------------- /notebooks/doc/StructuredEncoder_files/StructuredEncoder_9_0.text: -------------------------------------------------------------------------------- 1 | array([[0, 3], 2 | [1, 2], 3 | [2, 3], 4 | [3, 1], 5 | [4, 0], 6 | [5, 2]]) -------------------------------------------------------------------------------- /notebooks/doc/Weights.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _weight_types: 3 | 4 | Weight Types 5 | ============ 6 | 7 | Each of these algorithms is parameterized over several 8 | different semirings. The ``weight_type`` argument is used to specify 9 | the semiring. 10 | 11 | ============== ============== =============== =============== =============== ======= 12 | Name |splus| |stimes| |szero| |sone| |stype| 13 | ============== ============== =============== =============== =============== ======= 14 | **LogViterbi** :math:`\max` :math:`+` |ninf| 0 float32 15 | **Viterbi** :math:`\max` :math:`*` 0 1 float32 16 | **Real** :math:`+` :math:`*` 0 1 float32 17 | **Log** logsum :math:`+` |ninf| 0 float32 18 | **Boolean** or and false true uint8 19 | **Counting** :math:`+` :math:`*` 0 1 int32 20 | **MinMax** :math:`\min` :math:`\max` |ninf| |inf| float32 21 | ============== ============== =============== =============== =============== ======= 22 | 23 | .. |stype| replace:: :math:`\mathbb{K}`/dtype 24 | .. |inf| replace:: :math:`\infty` 25 | .. |ninf| replace:: :math:`-\infty` 26 | .. |sone| replace:: :math:`\bar{1}` 27 | .. |szero| replace:: :math:`\bar{0}` 28 | .. |splus| replace:: :math:`\oplus` 29 | .. |stimes| replace:: :math:`\otimes` 30 | 31 | Bibliography 32 | ------------ 33 | 34 | 35 | 36 | .. bibliography:: ../../full.bib 37 | :filter: key in {"goodman1998parsing", "allauzen2007openfst"} 38 | :style: plain 39 | 40 | Invariants 41 | ---------- 42 | 43 | 44 | Check the semiring properties. 45 | 46 | .. code:: python 47 | 48 | import pydecode.test 49 | Check the additive and multiplicative identities. 50 | 51 | .. code:: python 52 | 53 | @pydecode.test.property() 54 | def test_weight_properties(graph, weights, weight_type): 55 | assert (weight_type.one() * weight_type.from_value(weights[0])).value == weights[0] 56 | assert (weight_type.zero() + weight_type.from_value(weights[0])).value == weights[0] 57 | test_weight_properties() -------------------------------------------------------------------------------- /notebooks/doc/best_path.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.best\_path 3 | =================== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: best_path 8 | 9 | Example 10 | ------- 11 | 12 | 13 | This examples creates a simple hypergraph with random integer weights, 14 | and highlights the best path in the hypergraph. 15 | 16 | .. code:: python 17 | 18 | import pydecode 19 | import pydecode.test.utils 20 | import numpy as np 21 | graph = pydecode.test.utils.simple_hypergraph() 22 | weights = np.random.randint(10, size=(len(graph.edges))) 23 | pydecode.draw(graph, weights) 24 | 25 | 26 | 27 | .. image:: best_path_files/best_path_4_0.png 28 | 29 | 30 | 31 | .. code:: python 32 | 33 | path = pydecode.best_path(graph, weights * 1.) 34 | pydecode.draw(graph, weights, paths=[path]) 35 | 36 | 37 | 38 | .. image:: best_path_files/best_path_5_0.png 39 | 40 | 41 | 42 | Bibliography 43 | ------------ 44 | 45 | 46 | .. bibliography:: ../../full.bib 47 | :filter: key in {"younger1967recognition"} 48 | :style: plain 49 | 50 | Invariants 51 | ---------- 52 | 53 | 54 | - Best path is equal to 55 | 56 | .. math:: \ *x W*\ {x\_1} W\_{x\_n} 57 | 58 | .. code:: python 59 | 60 | import pydecode.test 61 | @pydecode.test.property(viterbi=True) 62 | def test_all_paths(graph, weights, weight_type): 63 | """ 64 | Compare best-path to exhaustive enumeration. 65 | """ 66 | best_path = pydecode.best_path(graph, weights, weight_type) 67 | best_score = pydecode.score(best_path, weights, weight_type) 68 | match = False 69 | for path in pydecode.test.all_paths(graph): 70 | score = pydecode.score(path, weights, weight_type) 71 | assert best_score.value >= score.value 72 | if path == best_path: 73 | match = True 74 | pydecode.test.assert_almost_equal(best_score.value, score.value) 75 | assert match 76 | test_all_paths() -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_2_0.text: -------------------------------------------------------------------------------- 1 | Help on function best_path in module pydecode: 2 | 3 | best_path(graph, weights, kind=, chart=None, back_pointers=None, mask=None) 4 | Find the best path through a hypergraph for a given set of weights. 5 | 6 | Formally gives 7 | :math:`\arg \max_{y \in {\cal X}} \theta^{\top} x` 8 | in the hypergraph. 9 | 10 | Parameters 11 | ---------- 12 | 13 | graph : :py:class:`Hypergraph` 14 | The underlying hypergraph :math:`({\cal V}, {\cal E})`. 15 | 16 | weights : Nx1 column vector (type depends on `kind`) 17 | The potential vector :math:`\theta` for each hyperedge. 18 | 19 | kind : A semiring type. 20 | The semiring to use. Must agree with weights. 21 | 22 | chart : Mx1 column vector. 23 | A chart buffer to reuse. 24 | 25 | Returns 26 | ------- 27 | path : :py:class:`Path` 28 | The best path :math:`\arg \max_{y \in {\cal X}} \theta^{\top} x`. 29 | 30 | -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_3_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/best_path_files/best_path_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/best_path_files/best_path_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/best_path_files/best_path_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/binarize.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.binarize 3 | ================= 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: binarize 8 | 9 | Examples 10 | -------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.test 16 | import numpy as np 17 | .. code:: python 18 | 19 | items = np.arange(10) 20 | chart = pydecode.ChartBuilder(items) 21 | chart.init(items[:4]) 22 | chart.set(items[5], [items[:4]], labels=[10]) 23 | graph = chart.finish() 24 | .. code:: python 25 | 26 | pydecode.draw(graph, graph.labeling, vertex_labels=None) 27 | 28 | 29 | 30 | .. image:: binarize_files/binarize_5_0.png 31 | 32 | 33 | 34 | .. code:: python 35 | 36 | new_graph = pydecode.binarize(graph) 37 | .. code:: python 38 | 39 | pydecode.draw(new_graph, new_graph.labeling, vertex_labels=None) 40 | 41 | 42 | 43 | .. image:: binarize_files/binarize_7_0.png 44 | 45 | 46 | 47 | Invariants 48 | ---------- 49 | 50 | 51 | .. code:: python 52 | 53 | 54 | Binarizing does not change best path score. 55 | 56 | .. code:: python 57 | 58 | @pydecode.test.property() 59 | def test_binarize(graph, weights, weight_type): 60 | binary_graph = pydecode.binarize(graph) 61 | size = np.max(graph.labeling) + 1 62 | label_weights = pydecode.test.random_weights(weight_type, size) 63 | 64 | new_weights = pydecode.transform(graph, label_weights, weight_type=weight_type) 65 | score1 = pydecode.inside(graph, new_weights, weight_type=weight_type)[graph.root.id] 66 | 67 | weights2 = pydecode.transform(binary_graph, label_weights, weight_type=weight_type) 68 | score2 = pydecode.inside(binary_graph, weights2, weight_type=weight_type)[graph.root.id] 69 | pydecode.test.assert_almost_equal(score1, score2) 70 | test_binarize() -------------------------------------------------------------------------------- /notebooks/doc/binarize_files/binarize_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/binarize_files/binarize_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/binarize_files/binarize_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/binarize_files/binarize_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/binarize_files/binarize_7_0.png -------------------------------------------------------------------------------- /notebooks/doc/binarize_files/binarize_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/cfg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 1, 13 | "source": [ 14 | "pydecode.nlp.cfg" 15 | ] 16 | }, 17 | { 18 | "cell_type": "raw", 19 | "source": [ 20 | ".. currentmodule:: pydecode.nlp\n", 21 | ".. autofunction:: cfg" 22 | ] 23 | }, 24 | { 25 | "cell_type": "heading", 26 | "level": 2, 27 | "source": [ 28 | "Examples" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "collapsed": false, 34 | "input": [ 35 | "import pydecode, pydecode.nlp, pydecode.test\n", 36 | "import numpy as np" 37 | ], 38 | "language": "python", 39 | "outputs": [], 40 | "prompt_number": 1 41 | }, 42 | { 43 | "cell_type": "heading", 44 | "level": 2, 45 | "source": [ 46 | "Bibliography" 47 | ] 48 | }, 49 | { 50 | "cell_type": "raw", 51 | "source": [] 52 | }, 53 | { 54 | "cell_type": "heading", 55 | "level": 2, 56 | "source": [ 57 | "Invariants" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "collapsed": false, 63 | "input": [ 64 | "def test_all_paths(sentence_length, grammar_size):\n", 65 | " graph, encoder = pydecode.nlp.cfg(sentence_length, \n", 66 | " grammar_size)\n", 67 | "\n", 68 | " # Generate all paths.\n", 69 | " p1 = np.array([encoder.transform_path(path).ravel()\n", 70 | " for path in pydecode.test.all_paths(graph)])\n", 71 | "\n", 72 | " # Generate all parses.\n", 73 | " p2 = np.array([parse.ravel()\n", 74 | " for parse in encoder.all_structures()])\n", 75 | " assert (p1[np.lexsort(p1.T)] == p2[np.lexsort(p2.T)]).all()\n", 76 | "\n", 77 | "for length in range(3, 6):\n", 78 | " for grammar in range(1, 3):\n", 79 | " test_all_paths(length, grammar)" 80 | ], 81 | "language": "python", 82 | "outputs": [], 83 | "prompt_number": 2 84 | } 85 | ] 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /notebooks/doc/cfg.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.nlp.cfg 3 | ================ 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: cfg 8 | 9 | Examples 10 | -------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.nlp, pydecode.test 16 | import numpy as np 17 | Bibliography 18 | ------------ 19 | 20 | 21 | 22 | 23 | Invariants 24 | ---------- 25 | 26 | 27 | .. code:: python 28 | 29 | def test_all_paths(sentence_length, grammar_size): 30 | graph, encoder = pydecode.nlp.cfg(sentence_length, 31 | grammar_size) 32 | 33 | # Generate all paths. 34 | p1 = np.array([encoder.transform_path(path).ravel() 35 | for path in pydecode.test.all_paths(graph)]) 36 | 37 | # Generate all parses. 38 | p2 = np.array([parse.ravel() 39 | for parse in encoder.all_structures()]) 40 | assert (p1[np.lexsort(p1.T)] == p2[np.lexsort(p2.T)]).all() 41 | 42 | for length in range(3, 6): 43 | for grammar in range(1, 3): 44 | test_all_paths(length, grammar) -------------------------------------------------------------------------------- /notebooks/doc/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os,sys 3 | 4 | wrapped_stdin = sys.stdin 5 | sys.stdin = sys.__stdin__ 6 | from IPython.kernel import KernelManager 7 | sys.stdin = wrapped_stdin 8 | 9 | from IPython.nbformat.current import reads 10 | 11 | # combined from 12 | # http://pytest.org/latest/example/nonpython.html#non-python-tests 13 | # and 14 | # https://gist.github.com/2621679 by minrk 15 | 16 | # tests = glob.glob(*.ipynb) 17 | 18 | 19 | def pytest_collect_file(path, parent): 20 | print path 21 | if path.ext == ".ipynb": 22 | return IPyNbFile(path, parent) 23 | 24 | class IPyNbFile(pytest.File): 25 | def collect(self): 26 | with self.fspath.open() as f: 27 | self.nb = reads(f.read(), 'json') 28 | 29 | cell_num = 0 30 | 31 | for ws in self.nb.worksheets: 32 | for cell in ws.cells: 33 | if cell.cell_type == "code": 34 | yield IPyNbCell(self.name, self, cell_num, cell) 35 | cell_num += 1 36 | 37 | def setup(self): 38 | self.km = KernelManager() 39 | self.km.start_kernel(stderr=open(os.devnull, 'w')) 40 | self.kc = self.km.client() 41 | self.kc.start_channels() 42 | self.shell = self.kc.shell_channel 43 | 44 | def teardown(self): 45 | self.km.shutdown_kernel() 46 | del self.shell 47 | del self.km 48 | 49 | class IPyNbCell(pytest.Item): 50 | def __init__(self, name, parent, cell_num, cell): 51 | super(IPyNbCell, self).__init__(name, parent) 52 | 53 | self.cell_num = cell_num 54 | self.cell = cell 55 | 56 | def runtest(self): 57 | print "running" 58 | shell = self.parent.shell 59 | shell.execute(self.cell.input, allow_stdin=False) 60 | reply = shell.get_msg(timeout=20)['content'] 61 | if reply['status'] == 'error': 62 | raise IPyNbException(self.cell_num, self.cell.input, '\n'.join(reply['traceback'])) 63 | 64 | def repr_failure(self, excinfo): 65 | """ called when self.runtest() raises an exception. """ 66 | if isinstance(excinfo.value, IPyNbException): 67 | return "\n".join([ 68 | "notebook worksheet execution failed", 69 | " cell %s\n\n" 70 | " input: %s\n\n" 71 | " raised: %s\n" % excinfo.value.args[0:3], 72 | ]) 73 | 74 | 75 | def reportinfo(self): 76 | return self.fspath, 0, "cell %d" % self.cell_num 77 | 78 | class IPyNbException(Exception): 79 | """ custom exception for error reporting. """ 80 | -------------------------------------------------------------------------------- /notebooks/doc/draw.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.draw 3 | ============= 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: draw 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.test 16 | import numpy as np 17 | graph = pydecode.test.random_hypergraph(5) 18 | words = np.array("it was the best of times it was the worst of times".split()) 19 | edge_labels = words[np.random.randint(words.shape[0]-1, size=len(graph.edges))] 20 | pydecode.draw(graph, edge_labels) 21 | 22 | 23 | 24 | .. image:: draw_files/draw_3_0.png 25 | 26 | 27 | 28 | .. code:: python 29 | 30 | path1 = pydecode.test.utils.random_path(graph) 31 | path2 = pydecode.test.utils.random_path(graph) 32 | path3 = pydecode.test.utils.random_path(graph) 33 | pydecode.draw(graph, edge_labels, paths=[path1, path2, path3]) 34 | 35 | 36 | 37 | .. image:: draw_files/draw_4_0.png 38 | 39 | 40 | 41 | Custom formatters inherit from ``HypergraphFormatter`` in 42 | pydecode.display. These allow you to specify GraphViz properties 43 | directly. 44 | 45 | .. code:: python 46 | 47 | from pydecode.display import HypergraphFormatter 48 | class MyFormatter(HypergraphFormatter): 49 | def graph_attrs(self): 50 | return {"rankdir": "TB"} 51 | def hypernode_attrs(self, vertex): 52 | return {"shape": "point"} 53 | def hyperedge_node_attrs(self, edge): 54 | label = "%s"%(self.edge_labels[edge.id]) 55 | if label[0] == "w": 56 | color = "blue" 57 | else: 58 | color = "pink" 59 | return {"shape": "circle", 60 | "color": color, 61 | "label": label} 62 | 63 | pydecode.draw(graph, edge_labels, formatter=MyFormatter()) 64 | 65 | 66 | 67 | .. image:: draw_files/draw_6_0.png 68 | 69 | 70 | -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_3_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/draw_files/draw_3_0.png -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_3_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/draw_files/draw_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/draw_files/draw_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/draw_files/draw_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/eisner.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.nlp.eisner 3 | =================== 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: eisner 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.nlp, pydecode.test 16 | import numpy as np 17 | graph, encoder = pydecode.nlp.eisner(2, order=1) 18 | .. code:: python 19 | 20 | labels = np.array(graph.labeling) 21 | labels[labels==-1] = 0 22 | pydecode.draw(graph, encoder.transform_labels(labels)) 23 | 24 | 25 | 26 | .. image:: eisner_files/eisner_4_0.png 27 | 28 | 29 | 30 | .. code:: python 31 | 32 | path = pydecode.test.random_path(graph) 33 | parse = encoder.transform_path(path) 34 | parse 35 | 36 | 37 | 38 | .. parsed-literal:: 39 | 40 | array([-1, 0, 1]) 41 | 42 | 43 | 44 | Bibliography 45 | ------------ 46 | 47 | 48 | .. bibliography:: ../../full.bib 49 | :filter: key in {"eisner1999efficient", "mcdonaldthesis"} 50 | :style: plain 51 | 52 | Invariants 53 | ---------- 54 | 55 | 56 | Check that each dependency parse is in the hypergraph. 57 | 58 | .. code:: python 59 | 60 | def test_all_paths(sentence_length, order): 61 | graph, encoder = pydecode.nlp.eisner(sentence_length, order) 62 | 63 | # Generate all paths. 64 | p1 = np.array([encoder.transform_path(path) 65 | for path in pydecode.test.all_paths(graph)]) 66 | 67 | # Generate all parses. 68 | p2 = np.array([parse 69 | for parse in encoder.all_structures()]) 70 | 71 | assert (p1[np.lexsort(p1.T)] == p2[np.lexsort(p2.T)]).all() 72 | 73 | for length in range(3, 6): 74 | for order in [1, 2]: 75 | test_all_paths(length, order) -------------------------------------------------------------------------------- /notebooks/doc/eisner_files/eisner_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/eisner_files/eisner_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/eisner_files/eisner_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/eisner_files/eisner_5_0.text: -------------------------------------------------------------------------------- 1 | array([-1, 0, 1]) -------------------------------------------------------------------------------- /notebooks/doc/filter.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.filter 3 | =============== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: filter 8 | 9 | Examples 10 | -------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode 16 | import numpy as np 17 | import pydecode.test.utils 18 | .. code:: python 19 | 20 | graph = pydecode.test.utils.simple_hypergraph() 21 | pydecode.draw(graph, labels=True) 22 | 23 | 24 | 25 | .. image:: filter_files/filter_4_0.png 26 | 27 | 28 | 29 | .. code:: python 30 | 31 | weights = pydecode.transform(graph, np.array([100, -10, 50])) 32 | filtered_graph = pydecode.filter(graph, np.array(weights >=0.0, dtype=np.uint8)) 33 | .. code:: python 34 | 35 | pydecode.draw(filtered_graph, labels=True) 36 | 37 | 38 | 39 | .. image:: filter_files/filter_6_0.png 40 | 41 | 42 | 43 | Invariants 44 | ---------- 45 | 46 | -------------------------------------------------------------------------------- /notebooks/doc/filter_files/filter_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/filter_files/filter_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/filter_files/filter_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/filter_files/filter_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/filter_files/filter_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/filter_files/filter_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/inside_files/inside_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/inside_files/inside_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/inside_files/inside_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/inside_files/inside_7_0.png -------------------------------------------------------------------------------- /notebooks/doc/inside_files/inside_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/intersect.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 1, 13 | "source": [ 14 | "pydecode.intersect" 15 | ] 16 | }, 17 | { 18 | "cell_type": "raw", 19 | "source": [ 20 | ".. note::\n", 21 | " ``intersect`` is in alpha state. The interface will likely change. " 22 | ] 23 | }, 24 | { 25 | "cell_type": "raw", 26 | "source": [ 27 | ".. currentmodule:: pydecode \n", 28 | ".. autofunction:: intersect " 29 | ] 30 | } 31 | ] 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /notebooks/doc/intersect.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.intersect 3 | ================== 4 | 5 | 6 | .. note:: 7 | ``intersect`` is in alpha state. The interface will likely change. 8 | 9 | .. currentmodule:: pydecode 10 | .. autofunction:: intersect 11 | -------------------------------------------------------------------------------- /notebooks/doc/kbest.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.kbest 3 | ============== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: kbest 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.test 16 | import numpy as np 17 | graph = pydecode.test.simple_hypergraph() 18 | weights = np.random.randint(10, size=len(graph.edges)) 19 | .. code:: python 20 | 21 | pydecode.draw(graph, weights) 22 | 23 | 24 | 25 | .. image:: kbest_files/kbest_4_0.png 26 | 27 | 28 | 29 | .. code:: python 30 | 31 | paths = pydecode.kbest(graph, 1.*weights, 10) 32 | weights * paths[0].v, weights * paths[1].v 33 | 34 | 35 | 36 | .. parsed-literal:: 37 | 38 | (array([11]), array([4])) 39 | 40 | 41 | 42 | .. code:: python 43 | 44 | pydecode.draw(graph, weights, paths=paths) 45 | 46 | 47 | 48 | .. image:: kbest_files/kbest_6_0.png 49 | 50 | 51 | 52 | Bibliography 53 | ------------ 54 | 55 | 56 | .. bibliography:: ../../full.bib 57 | :filter: key in {"huang2005better"} 58 | :style: plain 59 | 60 | Invariants 61 | ---------- 62 | 63 | 64 | .. code:: python 65 | 66 | 67 | First-best is the same as best-path. 68 | 69 | .. code:: python 70 | 71 | @pydecode.test.property(viterbi=True) 72 | def test_k_first_best(graph, weights, weight_type): 73 | paths = pydecode.kbest(graph, weights, 5) 74 | path = pydecode.best_path(graph, weights) 75 | assert(weights.T * paths[0].v == weights.T * path.v) 76 | assert(paths[0] == path) 77 | test_k_first_best() 78 | Check that k-best paths exhaustively. 79 | 80 | .. code:: python 81 | 82 | @pydecode.test.property(viterbi=True) 83 | def test_k_best(graph, weights, weight_type): 84 | paths = pydecode.kbest(graph, weights, 5) 85 | k_best_path_scores = [] 86 | for path2 in pydecode.test.all_paths(graph): 87 | score = pydecode.score(path2, weights, pydecode.LogViterbi) 88 | k_best_path_scores.append(score.value) 89 | k_best_path_scores.sort(reverse=True) 90 | k_best_path_scores = k_best_path_scores[:5] 91 | 92 | k_best_scores = [weights.T * path.v for path in paths] 93 | assert k_best_path_scores == k_best_scores 94 | test_k_best() -------------------------------------------------------------------------------- /notebooks/doc/kbest_files/kbest_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/kbest_files/kbest_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/kbest_files/kbest_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/kbest_files/kbest_5_0.text: -------------------------------------------------------------------------------- 1 | (array([11]), array([4])) -------------------------------------------------------------------------------- /notebooks/doc/kbest_files/kbest_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/kbest_files/kbest_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/kbest_files/kbest_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/lp.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.lp 3 | =========== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: lp 8 | 9 | .. currentmodule:: pydecode.lp 10 | .. autoclass:: HypergraphLP 11 | 12 | Examples 13 | -------- 14 | 15 | 16 | .. code:: python 17 | 18 | import pydecode, pydecode.test 19 | import numpy as np 20 | .. code:: python 21 | 22 | graph = pydecode.test.simple_hypergraph() 23 | weights = np.random.randint(10, size=len(graph.edges)) 24 | pydecode.draw(graph, weights) 25 | 26 | 27 | 28 | .. image:: lp_files/lp_5_0.png 29 | 30 | 31 | 32 | .. code:: python 33 | 34 | linear_program = pydecode.lp(graph, weights * 1.) 35 | linear_program.solve() 36 | pydecode.draw(graph, weights, paths=[linear_program.path]) 37 | 38 | 39 | 40 | .. image:: lp_files/lp_6_0.png 41 | 42 | 43 | 44 | .. code:: python 45 | 46 | print linear_program.lp 47 | 48 | .. parsed-literal:: 49 | 50 | Hypergraph Problem: 51 | MAXIMIZE 52 | 4.0*edge_0 + 4.0*edge_1 + 9.0*edge_2 + 0.0 53 | SUBJECT TO 54 | _C1: node_5 = 1 55 | 56 | _C2: - edge_0 - edge_1 + node_4 = 0 57 | 58 | _C3: - edge_2 + node_5 = 0 59 | 60 | _C4: - edge_0 + node_0 = 0 61 | 62 | _C5: - edge_0 - edge_1 + node_1 = 0 63 | 64 | _C6: - edge_1 + node_2 = 0 65 | 66 | _C7: - edge_2 + node_3 = 0 67 | 68 | _C8: - edge_2 + node_4 = 0 69 | 70 | _C9: - edge_2 + label_0 = 0 71 | 72 | _C10: - edge_2 + label_1 = 0 73 | 74 | _C11: - edge_2 + label_2 = 0 75 | 76 | VARIABLES 77 | edge_0 <= 1 Continuous 78 | edge_1 <= 1 Continuous 79 | edge_2 <= 1 Continuous 80 | label_0 <= 1 Continuous 81 | label_1 <= 1 Continuous 82 | label_2 <= 1 Continuous 83 | node_0 <= 1 Continuous 84 | node_1 <= 1 Continuous 85 | node_2 <= 1 Continuous 86 | node_3 <= 1 Continuous 87 | node_4 <= 1 Continuous 88 | node_5 <= 1 Continuous 89 | 90 | 91 | 92 | Bibliography 93 | ------------ 94 | 95 | 96 | .. bibliography:: ../../full.bib 97 | :filter: key in {"martin1990"} 98 | :style: plain 99 | 100 | Invariants 101 | ---------- 102 | 103 | 104 | Check that linear program always gives the same result as best path. 105 | 106 | .. code:: python 107 | 108 | @pydecode.test.property(viterbi=True) 109 | def test_linear_programming_solver(graph, weights, weight_type): 110 | best_path = pydecode.best_path(graph, weights) 111 | best_path_score = best_path.v.T * weights 112 | linear_program = pydecode.lp(graph, weights) 113 | linear_program.solve() 114 | assert linear_program.path == best_path 115 | assert best_path_score == linear_program.objective 116 | test_linear_programming_solver() -------------------------------------------------------------------------------- /notebooks/doc/lp_files/lp_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/lp_files/lp_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/lp_files/lp_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/lp_files/lp_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/lp_files/lp_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/lp_files/lp_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/lp_files/lp_7_0.text: -------------------------------------------------------------------------------- 1 | Hypergraph Problem: 2 | MAXIMIZE 3 | 4.0*edge_0 + 4.0*edge_1 + 9.0*edge_2 + 0.0 4 | SUBJECT TO 5 | _C1: node_5 = 1 6 | 7 | _C2: - edge_0 - edge_1 + node_4 = 0 8 | 9 | _C3: - edge_2 + node_5 = 0 10 | 11 | _C4: - edge_0 + node_0 = 0 12 | 13 | _C5: - edge_0 - edge_1 + node_1 = 0 14 | 15 | _C6: - edge_1 + node_2 = 0 16 | 17 | _C7: - edge_2 + node_3 = 0 18 | 19 | _C8: - edge_2 + node_4 = 0 20 | 21 | _C9: - edge_2 + label_0 = 0 22 | 23 | _C10: - edge_2 + label_1 = 0 24 | 25 | _C11: - edge_2 + label_2 = 0 26 | 27 | VARIABLES 28 | edge_0 <= 1 Continuous 29 | edge_1 <= 1 Continuous 30 | edge_2 <= 1 Continuous 31 | label_0 <= 1 Continuous 32 | label_1 <= 1 Continuous 33 | label_2 <= 1 Continuous 34 | node_0 <= 1 Continuous 35 | node_1 <= 1 Continuous 36 | node_2 <= 1 Continuous 37 | node_3 <= 1 Continuous 38 | node_4 <= 1 Continuous 39 | node_5 <= 1 Continuous 40 | 41 | -------------------------------------------------------------------------------- /notebooks/doc/marginals.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 1, 13 | "source": [ 14 | "pydecode.marginals" 15 | ] 16 | }, 17 | { 18 | "cell_type": "raw", 19 | "source": [ 20 | ".. currentmodule:: pydecode \n", 21 | ".. autofunction:: marginals " 22 | ] 23 | }, 24 | { 25 | "cell_type": "heading", 26 | "level": 2, 27 | "source": [ 28 | "Example" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "collapsed": false, 34 | "input": [ 35 | "import pydecode, pydecode.test\n", 36 | "import numpy as np" 37 | ], 38 | "language": "python", 39 | "outputs": [], 40 | "prompt_number": 1 41 | }, 42 | { 43 | "cell_type": "heading", 44 | "level": 2, 45 | "source": [ 46 | "Invariants" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "collapsed": false, 52 | "input": [], 53 | "language": "python", 54 | "outputs": [], 55 | "prompt_number": 12 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "source": [ 60 | "Marginals represent to the sum of all paths through each edge." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "collapsed": false, 66 | "input": [ 67 | "@pydecode.test.property()\n", 68 | "def test_all_marginals(graph, weights, weight_type):\n", 69 | " marginals = pydecode.marginals(graph, weights, weight_type=weight_type)\n", 70 | " marginals2 = [weight_type.Value.zero()] * len(graph.edges)\n", 71 | " for path in pydecode.test.all_paths(graph):\n", 72 | " score = pydecode.score(path, weights, weight_type)\n", 73 | " for edge in path:\n", 74 | " marginals2[edge.id] += score\n", 75 | " \n", 76 | " marginals2 = np.array([m.value for m in marginals2])\n", 77 | " pydecode.test.assert_almost_equal(marginals, \n", 78 | " marginals2, 5)\n", 79 | "test_all_marginals()" 80 | ], 81 | "language": "python", 82 | "outputs": [], 83 | "prompt_number": 16 84 | } 85 | ] 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /notebooks/doc/marginals.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.marginals 3 | ================== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: marginals 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.test 16 | import numpy as np 17 | Invariants 18 | ---------- 19 | 20 | 21 | .. code:: python 22 | 23 | 24 | Marginals represent to the sum of all paths through each edge. 25 | 26 | .. code:: python 27 | 28 | @pydecode.test.property() 29 | def test_all_marginals(graph, weights, weight_type): 30 | marginals = pydecode.marginals(graph, weights, weight_type=weight_type) 31 | marginals2 = [weight_type.Value.zero()] * len(graph.edges) 32 | for path in pydecode.test.all_paths(graph): 33 | score = pydecode.score(path, weights, weight_type) 34 | for edge in path: 35 | marginals2[edge.id] += score 36 | 37 | marginals2 = np.array([m.value for m in marginals2]) 38 | pydecode.test.assert_almost_equal(marginals, 39 | marginals2, 5) 40 | test_all_marginals() -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_10_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_10_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_11_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_11_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_4_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_7_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/outside_files/outside_8_0.png -------------------------------------------------------------------------------- /notebooks/doc/outside_files/outside_8_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/semimarkov.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 1, 13 | "source": [ 14 | "pydecode.nlp.semimarkov" 15 | ] 16 | }, 17 | { 18 | "cell_type": "raw", 19 | "source": [ 20 | ".. currentmodule:: pydecode.nlp\n", 21 | ".. autofunction:: semimarkov" 22 | ] 23 | }, 24 | { 25 | "cell_type": "heading", 26 | "level": 2, 27 | "source": [ 28 | "Example" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "collapsed": false, 34 | "input": [ 35 | "import pydecode" 36 | ], 37 | "language": "python", 38 | "outputs": [] 39 | }, 40 | { 41 | "cell_type": "heading", 42 | "level": 2, 43 | "source": [ 44 | "Bibliography" 45 | ] 46 | }, 47 | { 48 | "cell_type": "raw", 49 | "source": [ 50 | ".. bibliography:: ../../full.bib \n", 51 | " :filter: key in {\"sarawagi2004semi\"}\n", 52 | " :style: plain" 53 | ] 54 | } 55 | ] 56 | } 57 | ] 58 | } -------------------------------------------------------------------------------- /notebooks/doc/semimarkov.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.nlp.semimarkov 3 | ======================= 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: semimarkov 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode 16 | Bibliography 17 | ------------ 18 | 19 | 20 | .. bibliography:: ../../full.bib 21 | :filter: key in {"sarawagi2004semi"} 22 | :style: plain 23 | -------------------------------------------------------------------------------- /notebooks/doc/tagger.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.nlp.tagger 3 | =================== 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: tagger 8 | 9 | Example 10 | ------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode, pydecode.nlp, pydecode.test 16 | graph, encoder = pydecode.nlp.tagger(3, [2, 2, 1], order=1) 17 | .. code:: python 18 | 19 | graph.labeling 20 | 21 | 22 | 23 | .. parsed-literal:: 24 | 25 | array([ 4, 6, 5, 7, 8, 10], dtype=int32) 26 | 27 | 28 | 29 | .. code:: python 30 | 31 | pydecode.draw(graph, encoder.transform_labels(graph.labeling)) 32 | 33 | 34 | 35 | .. image:: tagger_files/tagger_5_0.png 36 | 37 | 38 | 39 | Bibliography 40 | ------------ 41 | 42 | 43 | Invariants 44 | ---------- 45 | 46 | 47 | Try all possible taggings. 48 | 49 | .. code:: python 50 | 51 | import pydecode 52 | import numpy as np 53 | def test_all_paths(sentence_length): 54 | K = [10] * (sentence_length-1) + [1] 55 | graph, encoder = pydecode.nlp.tagger(sentence_length, K) 56 | 57 | # Generate all paths. 58 | p1 = np.array([encoder.transform_path(path) 59 | for path in pydecode.test.all_paths(graph)]) 60 | 61 | # Generate all parses. 62 | p2 = np.array([parse 63 | for parse in encoder.all_structures()]) 64 | 65 | assert (p1[np.lexsort(p1.T)] == p2[np.lexsort(p2.T)]).all() 66 | 67 | for i in range(2, 6): 68 | test_all_paths(i) -------------------------------------------------------------------------------- /notebooks/doc/tagger_files/tagger_4_0.text: -------------------------------------------------------------------------------- 1 | array([ 4, 6, 5, 7, 8, 10], dtype=int32) -------------------------------------------------------------------------------- /notebooks/doc/tagger_files/tagger_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/tagger_files/tagger_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/tagger_files/tagger_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/training.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.model.DynamicProgrammingModel 3 | ====================================== 4 | 5 | 6 | .. currentmodule:: pydecode.nlp 7 | .. autofunction:: StructuredEncoder 8 | 9 | Examples 10 | -------- 11 | 12 | 13 | .. code:: python 14 | 15 | import pydecode 16 | import pydecode.nlp 17 | from pydecode.model import DynamicProgrammingModel, HammingLossModel 18 | from pystruct.learners import StructuredPerceptron 19 | import numpy as np 20 | .. code:: python 21 | 22 | tags = ["D", "V", "N", "P"] 23 | n_tags = len(tags) 24 | n_words = 10 25 | .. code:: python 26 | 27 | class SimpleTagModel(HammingLossModel, DynamicProgrammingModel): 28 | def templates(self): 29 | return [(n_tags, n_tags), 30 | (n_tags, n_words), 31 | (n_tags, n_tags, n_words)] 32 | 33 | def parts_features(self, x, parts): 34 | x_arr = np.array(x) 35 | return [(parts[:,1], parts[:,2]), 36 | (parts[:,1], x_arr[parts[:,0]]), 37 | (parts[:,1], parts[:,2], x_arr[parts[:,0]])] 38 | 39 | def dynamic_program(self, x): 40 | n = len(x) 41 | return pydecode.nlp.tagger(n, [1]+[len(tags)] * (n-2) +[1]) 42 | .. code:: python 43 | 44 | model = SimpleTagModel() 45 | sp = StructuredPerceptron(model, verbose=True, max_iter=10, average=True) 46 | .. code:: python 47 | 48 | X = [(0,1,2,3, 0), (0,2,2,3,0)] 49 | Y = [(0,1,2,3, 0), (0,2,2,3,0)] 50 | sp.fit(X, Y) 51 | None 52 | 53 | .. parsed-literal:: 54 | 55 | iteration 0 56 | avg loss: 0.400000 w: [[-1.] 57 | [ 0.] 58 | ..., 59 | [ 0.] 60 | [ 0.]] 61 | effective learning rate: 1.000000 62 | iteration 1 63 | avg loss: 0.200000 w: [[-1.] 64 | [ 0.] 65 | ..., 66 | [ 0.] 67 | [ 0.]] 68 | effective learning rate: 1.000000 69 | iteration 2 70 | avg loss: 0.200000 w: [[-1.] 71 | [ 0.] 72 | ..., 73 | [ 0.] 74 | [ 0.]] 75 | effective learning rate: 1.000000 76 | iteration 3 77 | avg loss: 0.000000 w: [[-1.] 78 | [ 0.] 79 | ..., 80 | [ 0.] 81 | [ 0.]] 82 | effective learning rate: 1.000000 83 | Loss zero. Stopping. 84 | 85 | 86 | .. code:: python 87 | 88 | sp.predict(X) 89 | 90 | 91 | 92 | .. parsed-literal:: 93 | 94 | [array([0, 1, 2, 3, 0], dtype=int32), array([0, 2, 2, 3, 0], dtype=int32)] 95 | 96 | 97 | 98 | Bibliography 99 | ------------ 100 | 101 | 102 | .. bibliography:: ../../full.bib 103 | :filter: key in {"collins02perc"} 104 | :style: plain 105 | 106 | Invariants 107 | ---------- 108 | 109 | 110 | .. code:: python 111 | 112 | -------------------------------------------------------------------------------- /notebooks/doc/training_files/training_7_0.text: -------------------------------------------------------------------------------- 1 | iteration 0 2 | avg loss: 0.400000 w: [[-1.] 3 | [ 0.] 4 | ..., 5 | [ 0.] 6 | [ 0.]] 7 | effective learning rate: 1.000000 8 | iteration 1 9 | avg loss: 0.200000 w: [[-1.] 10 | [ 0.] 11 | ..., 12 | [ 0.] 13 | [ 0.]] 14 | effective learning rate: 1.000000 15 | iteration 2 16 | avg loss: 0.200000 w: [[-1.] 17 | [ 0.] 18 | ..., 19 | [ 0.] 20 | [ 0.]] 21 | effective learning rate: 1.000000 22 | iteration 3 23 | avg loss: 0.000000 w: [[-1.] 24 | [ 0.] 25 | ..., 26 | [ 0.] 27 | [ 0.]] 28 | effective learning rate: 1.000000 29 | Loss zero. Stopping. 30 | -------------------------------------------------------------------------------- /notebooks/doc/training_files/training_8_0.text: -------------------------------------------------------------------------------- 1 | [array([0, 1, 2, 3, 0], dtype=int32), array([0, 2, 2, 3, 0], dtype=int32)] -------------------------------------------------------------------------------- /notebooks/doc/transform.rst: -------------------------------------------------------------------------------- 1 | 2 | pydecode.transform 3 | ================== 4 | 5 | 6 | .. currentmodule:: pydecode 7 | .. autofunction:: transform 8 | 9 | .. currentmodule:: pydecode 10 | .. autofunction:: inverse_transform 11 | 12 | Examples 13 | -------- 14 | 15 | 16 | .. code:: python 17 | 18 | import pydecode 19 | import numpy as np 20 | .. code:: python 21 | 22 | items = np.arange(11) 23 | chart = pydecode.ChartBuilder(items) 24 | chart.init(items[0]) 25 | chart.set(items[5], [[0] for i in range(5)], np.arange(5) % 3) 26 | chart.set(items[10], [[5] for i in range(5)], np.arange(5) % 3) 27 | graph = chart.finish() 28 | pydecode.draw(graph, labels=True) 29 | 30 | 31 | 32 | .. image:: transform_files/transform_5_0.png 33 | 34 | 35 | 36 | .. code:: python 37 | 38 | values = np.array([-100, 0, 100]) 39 | weights = pydecode.transform(graph, values) 40 | pydecode.draw(graph, weights) 41 | 42 | 43 | 44 | .. image:: transform_files/transform_6_0.png 45 | 46 | 47 | 48 | .. code:: python 49 | 50 | marginals = pydecode.marginals(graph, values * 1.) 51 | print pydecode.inverse_transform(graph, marginals) 52 | 53 | .. parsed-literal:: 54 | 55 | [ 2.63066590e+267 2.63066590e+267 2.63066590e+267] 56 | 57 | 58 | Invariants 59 | ---------- 60 | 61 | 62 | Check that the transforms are invertible. 63 | 64 | .. code:: python 65 | 66 | import numpy.testing as test 67 | import pydecode.test 68 | Weight transform. 69 | 70 | .. code:: python 71 | 72 | @pydecode.test.property() 73 | def test_weight_transform(graph, weights, weight_type): 74 | size = np.max(graph.labeling) + 1 75 | label_weights = pydecode.test.random_weights(weight_type, size) 76 | edge_weights = pydecode.transform(graph, label_weights, weight_type=weight_type) 77 | weights2 = np.zeros(len(graph.edges)) 78 | weights2.fill(weight_type.Value.one_raw()) 79 | for i, label in enumerate(graph.labeling): 80 | if label == -1: continue 81 | weights2[i] = label_weights[label] 82 | test.assert_almost_equal(edge_weights, weights2, 5) 83 | test_weight_transform() 84 | Reverse transform. 85 | 86 | .. code:: python 87 | 88 | @pydecode.test.property() 89 | def test_reverse_weight_transform(graph, weights, weight_type): 90 | size = np.max(graph.labeling) + 1 91 | weights = pydecode.test.random_weights(weight_type, len(graph.edges)) 92 | label_weights = pydecode.inverse_transform(graph, weights, weight_type=weight_type) 93 | label_weights2 = [weight_type.Value.zero()] * size 94 | for i, label in enumerate(graph.labeling): 95 | if label == -1: continue 96 | label_weights2[label] += weight_type.Value(weights[i]) 97 | label_weights2 = np.array([weight.value for weight in label_weights2]) 98 | test.assert_almost_equal(label_weights, label_weights2, 5) 99 | test_reverse_weight_transform() -------------------------------------------------------------------------------- /notebooks/doc/transform_files/transform_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/transform_files/transform_5_0.png -------------------------------------------------------------------------------- /notebooks/doc/transform_files/transform_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/transform_files/transform_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/doc/transform_files/transform_6_0.png -------------------------------------------------------------------------------- /notebooks/doc/transform_files/transform_6_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/doc/transform_files/transform_7_0.text: -------------------------------------------------------------------------------- 1 | [ 2.63066590e+267 2.63066590e+267 2.63066590e+267] 2 | -------------------------------------------------------------------------------- /notebooks/fill_files/fill_3_0.text: -------------------------------------------------------------------------------- 1 | 50 tokens 45 labels 50594 2 | 50 tokens 45 labels (pydecode) 43898 3 | 4 | 50 tokens 12 labels 596459 5 | 50 tokens 12 labels (pydecode) 422825 6 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_11_0.text: -------------------------------------------------------------------------------- 1 | array([[1, 1, 0], 2 | [2, 2, 1], 3 | [3, 3, 2], 4 | [4, 1, 3], 5 | [5, 2, 1], 6 | [6, 3, 2], 7 | [7, 4, 3]]) -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_12_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hmm_files/hmm_13_0.png -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_13_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hmm_files/hmm_14_0.png -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_14_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_15_0.text: -------------------------------------------------------------------------------- 1 | ['START', 'D', 'N', 'V', 'END'] -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_4_0.text: -------------------------------------------------------------------------------- 1 | D END N START V 2 | D 0.1 0 0.1 0.4 0.4 3 | END 0.0 1 0.2 0.0 0.1 4 | N 0.8 0 0.1 0.3 0.3 5 | V 0.1 0 0.6 0.3 0.2 6 | END START dog in park the walked 7 | D 0 0 0.1 1 0.0 0.8 0 8 | END 1 0 0.0 0 0.0 0.0 0 9 | N 0 0 0.8 0 0.1 0.1 0 10 | START 0 1 0.0 0 0.0 0.0 0 11 | V 0 0 0.1 0 0.9 0.1 1 12 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hmm_files/hmm_8_0.png -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_8_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hmm_files/hmm_9_0.png -------------------------------------------------------------------------------- /notebooks/hmm_files/hmm_9_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hypergraphs.rst: -------------------------------------------------------------------------------- 1 | 2 | Simple Hypergraph Example 3 | ========================= 4 | 5 | 6 | .. code:: python 7 | 8 | import pydecode.hyper as ph 9 | import pydecode.display as display 10 | .. code:: python 11 | 12 | hyp = ph.Hypergraph() 13 | with hyp.builder() as b: 14 | n1 = b.add_node(label = "a") 15 | n2 = b.add_node(label = "b") 16 | n3 = b.add_node(label = "c") 17 | n4 = b.add_node(label = "d") 18 | n5 = b.add_node((([n1, n2], "edge1"),), label = "e") 19 | b.add_node([([n5], "edge3"), ([n3, n4], "edge2")], label = "root") 20 | 21 | def build_potentials(label): 22 | return {"edge1" : 3, "edge2" : 1, "edge3" : 1}[label] 23 | potentials = ph.LogViterbiPotentials(hyp).from_vector([build_potentials(edge.label) for edge in hyp.edges]) 24 | Draw the graph 25 | 26 | .. code:: python 27 | 28 | display.HypergraphPotentialFormatter(hyp, potentials).to_ipython() 29 | 30 | 31 | 32 | .. image:: hypergraphs_files/hypergraphs_4_0.png 33 | 34 | 35 | 36 | .. code:: python 37 | 38 | path = ph.best_path(hyp, potentials) 39 | display.HypergraphPathFormatter(hyp, [path]).to_ipython() 40 | 41 | 42 | 43 | .. image:: hypergraphs_files/hypergraphs_5_0.png 44 | 45 | 46 | -------------------------------------------------------------------------------- /notebooks/hypergraphs_files/hypergraphs_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hypergraphs_files/hypergraphs_4_0.png -------------------------------------------------------------------------------- /notebooks/hypergraphs_files/hypergraphs_4_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/hypergraphs_files/hypergraphs_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/hypergraphs_files/hypergraphs_5_0.png -------------------------------------------------------------------------------- /notebooks/hypergraphs_files/hypergraphs_5_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/images/EditDistance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/EditDistance.png -------------------------------------------------------------------------------- /notebooks/images/Fibonacci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/Fibonacci.png -------------------------------------------------------------------------------- /notebooks/images/decipher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/decipher.png -------------------------------------------------------------------------------- /notebooks/images/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/graph.png -------------------------------------------------------------------------------- /notebooks/images/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/hmm.png -------------------------------------------------------------------------------- /notebooks/images/hmm_30_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/hmm_30_0.png -------------------------------------------------------------------------------- /notebooks/images/parse_hypergraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/parse_hypergraph.png -------------------------------------------------------------------------------- /notebooks/images/parse_hypergraph_no_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/parse_hypergraph_no_path.png -------------------------------------------------------------------------------- /notebooks/images/parsing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/parsing.png -------------------------------------------------------------------------------- /notebooks/images/single_edge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/single_edge.png -------------------------------------------------------------------------------- /notebooks/images/trap-left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/trap-left.png -------------------------------------------------------------------------------- /notebooks/images/trap-right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/trap-right.png -------------------------------------------------------------------------------- /notebooks/images/triangle-left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/triangle-left.png -------------------------------------------------------------------------------- /notebooks/images/triangle-right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/triangle-right.png -------------------------------------------------------------------------------- /notebooks/images/triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/images/triangle.png -------------------------------------------------------------------------------- /notebooks/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | 1. :doc:`Fibonacci` 5 | 6 | .. image:: images/Fibonacci.png 7 | :width: 500 px 8 | :align: center 9 | 10 | | 11 | | 12 | 13 | 2. :doc:`EditDistance` 14 | 15 | .. image:: images/EditDistance.png 16 | :width: 500 px 17 | :align: center 18 | 19 | | 20 | | 21 | 22 | 3. :doc:`hmm` 23 | 24 | .. image:: images/hmm.png 25 | :width: 500 px 26 | :align: center 27 | 28 | | 29 | | 30 | 31 | 32 | 4. :doc:`Parsing` 33 | 34 | .. image:: images/parsing.png 35 | :width: 500 px 36 | :align: center 37 | 38 | | 39 | | 40 | 41 | 5. :doc:`sequence_crf` 42 | 43 | 44 | .. More Examples 45 | .. ============= 46 | 47 | .. `Decipherment`_ 48 | 49 | .. .. image:: images/decipher.png 50 | .. :width: 500 px 51 | .. :align: center 52 | 53 | 54 | .. `Building a Hypergraph`_ 55 | 56 | .. 57 | _Fibonacci : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/Fibonacci.ipynb 58 | _`Edit Distance` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/EditDistance.ipynb 59 | _`Hidden Markov Model` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/hmm.ipynb 60 | _`Dependency Parsing` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/parsing.ipynb 61 | _`Training a Conditional Random Field` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/PerceptronTagger.ipynb 62 | 63 | 64 | .. _`Decipherment` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/decipher.ipynb 65 | .. _`Building a Hypergraph` : http://nbviewer.ipython.org/github/srush/PyDecode/blob/master/notebooks/BuildingHypergraph.ipynb 66 | 67 | .. toctree:: 68 | :maxdepth: 2 69 | :hidden: 70 | 71 | Fibonacci 72 | EditDistance 73 | hmm 74 | parsing 75 | sequence_crf 76 | .. decipher 77 | .. BuildingHypergraph 78 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/parsing_files/parsing_10_0.png -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_10_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/parsing_files/parsing_11_0.png -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_11_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_4_0.text: -------------------------------------------------------------------------------- 1 | -c:7: FutureWarning: Numpy has detected that you (may be) writing to an array returned 2 | by numpy.diagonal or by selecting multiple fields in a record 3 | array. This code will likely break in the next numpy release -- 4 | see numpy.diagonal or arrays.indexing reference docs for details. 5 | The quick fix is to make an explicit copy (e.g., do 6 | arr.diagonal().copy() or arr[['f0','f1']].copy()). 7 | -c:8: FutureWarning: Numpy has detected that you (may be) writing to an array returned 8 | by numpy.diagonal or by selecting multiple fields in a record 9 | array. This code will likely break in the next numpy release -- 10 | see numpy.diagonal or arrays.indexing reference docs for details. 11 | The quick fix is to make an explicit copy (e.g., do 12 | arr.diagonal().copy() or arr[['f0','f1']].copy()). 13 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_5_0.text: -------------------------------------------------------------------------------- 1 | [[2 1] 2 | [3 2] 3 | [0 3]] 4 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_6_0.text: -------------------------------------------------------------------------------- 1 | array([[0, 1], 2 | [3, 2], 3 | [0, 3]]) -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/parsing_files/parsing_7_0.png -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_7_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/parsing_files/parsing_8_0.png -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_8_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/parsing_files/parsing_9_0.text: -------------------------------------------------------------------------------- 1 | array([ -1.00000000e+09, 2.39736353e+00, 2.31316550e+00, 2 | 2.39736353e+00, -1.00000000e+09, -1.00000000e+09, 3 | 2.17027452e+00, 2.11240168e+00, -1.00000000e+09, 4 | 2.33194419e+00, -1.00000000e+09, 2.27204940e+00, 5 | -1.00000000e+09, 2.09714112e+00, 2.39736353e+00, 6 | -1.00000000e+09]) -------------------------------------------------------------------------------- /notebooks/phrase_based.rst: -------------------------------------------------------------------------------- 1 | 2 | .. code:: python 3 | 4 | import pydecode.nlp.phrase_based as pb 5 | import pydecode.chart as chart 6 | import pydecode.test.utils as utils 7 | import pydecode.hyper as ph 8 | .. code:: python 9 | 10 | n = 3 11 | .. code:: python 12 | 13 | c = chart.ChartBuilder(lambda a:a, chart.HypergraphSemiRing, 14 | build_hypergraph = True) 15 | phrases = [pb.Phrase(*p) for p in [((0,2), [0]), ((1,2), [0,1]), ((0,2), [1]), ((1,1), [1])]] 16 | words = range(3) 17 | phrases = pb.make_phrase_table(phrases) 18 | 19 | pb.phrase_lattice(n, phrases, words, c) 20 | lat = c.finish() 21 | .. code:: python 22 | 23 | import pydecode.display as display 24 | display.HypergraphFormatter(lat).to_ipython() 25 | 26 | 27 | 28 | .. image:: phrase_based_files/phrase_based_3_0.png 29 | 30 | 31 | 32 | .. code:: python 33 | 34 | w = utils.random_log_viterbi_potentials(lat) 35 | ins = ph.inside(lat, w) 36 | out = ph.outside(lat, w, ins) 37 | .. code:: python 38 | 39 | groups = [(node.label.num_source if node.label != "END" else n+1)b for node in lat.nodes ] 40 | num_groups = max(groups) + 1 41 | limits = [100] * num_groups 42 | .. code:: python 43 | 44 | def make_constraints(edge): 45 | b = ph.Bitset() 46 | if edge.label is None: 47 | return b 48 | for i in range(edge.label.source_span[0], edge.label.source_span[1]): 49 | b[i] = 1 50 | return b 51 | 52 | constraints = ph.BinaryVectorPotentials(lat)\ 53 | .from_vector([make_constraints(edge) for edge in lat.edges] ) 54 | .. code:: python 55 | 56 | print groups 57 | print num_groups 58 | print limits 59 | 60 | .. parsed-literal:: 61 | 62 | [0, 1, 2, 2, 4] 63 | 5 64 | [100, 100, 100, 100, 100] 65 | 66 | 67 | .. code:: python 68 | 69 | chart = ph.beam_search_BinaryVector(lat, w, constraints, out, -10000, groups, limits) 70 | 71 | :: 72 | 73 | 74 | --------------------------------------------------------------------------- 75 | TypeError Traceback (most recent call last) 76 | 77 | in () 78 | ----> 1 chart = ph.beam_search_BinaryVector(lat, w, constraints, out, -10000, groups, limits) 79 | 80 | 81 | /home/srush/Projects/decoding/python/pydecode/potentials.so in pydecode.potentials.beam_search_BinaryVector (python/pydecode/potentials.cpp:15870)() 82 | 83 | 84 | TypeError: beam_search_BinaryVector() takes exactly 8 positional arguments (7 given) 85 | 86 | 87 | .. code:: python 88 | 89 | for n in lat.nodes: 90 | print chart[n] 91 | 92 | .. parsed-literal:: 93 | 94 | [(, 0.0, 2.4306261765868884)] 95 | [(, 0.7574388452061992, 1.673187331380689)] 96 | [(, 0.3133211265699205, 0.901839078839646)] 97 | [(, 0.03291616576722978, 0.7459879956839593)] 98 | [(, 1.2151602054095665, 0.0), (, 0.7789041614511891, 0.0)] 99 | 100 | -------------------------------------------------------------------------------- /notebooks/phrase_based_files/phrase_based_3_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/notebooks/phrase_based_files/phrase_based_3_0.png -------------------------------------------------------------------------------- /notebooks/phrase_based_files/phrase_based_3_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/phrase_based_files/phrase_based_7_0.text: -------------------------------------------------------------------------------- 1 | [0, 1, 2, 2, 4] 2 | 5 3 | [100, 100, 100, 100, 100] 4 | -------------------------------------------------------------------------------- /notebooks/phrase_based_files/phrase_based_9_0.text: -------------------------------------------------------------------------------- 1 | [(, 0.0, 2.4306261765868884)] 2 | [(, 0.7574388452061992, 1.673187331380689)] 3 | [(, 0.3133211265699205, 0.901839078839646)] 4 | [(, 0.03291616576722978, 0.7459879956839593)] 5 | [(, 1.2151602054095665, 0.0), (, 0.7789041614511891, 0.0)] 6 | -------------------------------------------------------------------------------- /notebooks/sequence_crf_files/sequence_crf_10_0.text: -------------------------------------------------------------------------------- 1 | [{Bigram(position=0, prevtag='', tag='N'), 2 | Bigram(position=1, prevtag='N', tag='N'), 3 | Bigram(position=2, prevtag='N', tag='N'), 4 | Bigram(position=3, prevtag='N', tag='N'), 5 | Bigram(position=4, prevtag='N', tag='N'), 6 | Bigram(position=5, prevtag='N', tag='')}] -------------------------------------------------------------------------------- /notebooks/sequence_crf_files/sequence_crf_7_0.text: -------------------------------------------------------------------------------- 1 | set(['I', 'V', 'D', 'N']) 2 | -------------------------------------------------------------------------------- /notebooks/sequence_crf_files/sequence_crf_8_0.text: -------------------------------------------------------------------------------- 1 | [Bigram(position=0, prevtag='', tag='D'), Bigram(position=1, prevtag='D', tag='N'), Bigram(position=2, prevtag='N', tag='V'), Bigram(position=3, prevtag='V', tag='')] 2 | -------------------------------------------------------------------------------- /notebooks/sequence_crf_files/sequence_crf_9_0.text: -------------------------------------------------------------------------------- 1 | set(['I', 'V', 'D', 'N']) 2 | iteration 0 3 | avg loss: 0.666667 w: [ 0. 0. 2. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 4 | 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 5 | 1. 0. 1. 1. 1. 1. 1. 0. 0. -2. 2. 0. 0. 0. -2. 2. 0. 0. 6 | 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0.] 7 | effective learning rate: 1.000000 8 | iteration 1 9 | avg loss: 0.000000 w: [ 0. 0. 2. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 10 | 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 11 | 1. 0. 1. 1. 1. 1. 1. 0. 0. -2. 2. 0. 0. 0. -2. 2. 0. 0. 12 | 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0.] 13 | effective learning rate: 1.000000 14 | Loss zero. Stopping. 15 | -------------------------------------------------------------------------------- /optional-requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme>=0.1.6 2 | 3 | # For drawing graphs. 4 | networkx>=1.7 5 | pygraphviz>=1.0 6 | 7 | # For reports. 8 | pandas>=0.11 9 | matplotlib>=1.2 10 | 11 | # For IPython interaction. 12 | ipython>=1.0 13 | 14 | # For ILP module. 15 | pulp>=1.5 16 | 17 | # To run the tests. 18 | pyzmq>=2.1.11 19 | 20 | breathe>=1.0 21 | cvxopt 22 | scikits-learn 23 | pystruct>=0.1 24 | -------------------------------------------------------------------------------- /python/pydecode/ext/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /python/pydecode/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | import dependency_parsing 2 | import tagging 3 | import cfg as cfg_ 4 | import numpy as np 5 | from collections import Counter, defaultdict 6 | import re 7 | import numpy as np 8 | 9 | 10 | def eisner(sentence_length, order=1): 11 | """ 12 | Implements the dynamic programming algorithm 13 | for projective dependency parsing. 14 | 15 | Parameters 16 | ---------- 17 | sentence_length : int 18 | The length of the sentence. 19 | 20 | order : int 21 | The order of dependency arcs. Currently 22 | implements {1,2} 23 | 24 | Returns 25 | ------- 26 | graph : :py:class:`Hypergraph` 27 | Hypergraph encoding all valid parses. 28 | 29 | index_set : 30 | 31 | """ 32 | n = sentence_length + 1 33 | if order == 1: 34 | return dependency_parsing.eisner_first_order(n) 35 | elif order == 2: 36 | return dependency_parsing.eisner_second_order(n) 37 | 38 | 39 | def tagger(sentence_length, tag_sizes, order=1): 40 | """ 41 | Implements dynamic programming algorithm 42 | for an ngram tagger. 43 | 44 | Parameters 45 | ---------- 46 | sentence_length : int 47 | The length of the sentence. 48 | 49 | order : int 50 | The order of the tagger. Currently 51 | implements {1}. 52 | 53 | Returns 54 | ------- 55 | graph : :py:class:`Hypergraph` 56 | Hypergraph encoding all 57 | 58 | index_set : 59 | """ 60 | if order == 1: 61 | return tagging.tagger_first_order(sentence_length, tag_sizes) 62 | 63 | def semimarkov(sentence_length): 64 | """ 65 | Implements dynamic programming algorithm 66 | for a semi-markov tagger. 67 | 68 | Parameters 69 | ---------- 70 | sentence_length : int 71 | The length of the sentence. 72 | 73 | 74 | Returns 75 | ------- 76 | graph : :py:class:`Hypergraph` 77 | Hypergraph encoding all 78 | 79 | index_set : 80 | """ 81 | raise NotImplementedError() 82 | 83 | def cfg(sentence_length, grammar_size): 84 | """ 85 | Implements dynamic programming algorithm 86 | for a Chomsky normal form. 87 | 88 | Parameters 89 | ---------- 90 | sentence_length : int 91 | The length of the sentence. 92 | 93 | 94 | Returns 95 | ------- 96 | graph : :py:class:`Hypergraph` 97 | Hypergraph encoding all 98 | 99 | index_set : 100 | """ 101 | return cfg_.cnf_cky(sentence_length, grammar_size) 102 | 103 | 104 | def read_csv_records(f, front=[], back =[], limit=None, length=None): 105 | s = open(f).read() 106 | for i, l in enumerate(re.finditer("(.*?)\n\n", s, re.DOTALL)): 107 | if limit is not None and i > limit: 108 | break 109 | if length is not None and len(l.group(1).split("\n")) > length: 110 | continue 111 | yield np.array(front + [line.split() 112 | for line in l.group(1).split("\n")] + back) 113 | 114 | CONLL = {"INDEX":1, 115 | "WORD":1, 116 | "TAG":3, 117 | "HEAD":6, 118 | "LABEL":7} 119 | 120 | TAG = {"WORD":0, 121 | "TAG":1} 122 | -------------------------------------------------------------------------------- /python/pydecode/nlp/cfg_tests.py: -------------------------------------------------------------------------------- 1 | from pydecode.nlp.cfg import * 2 | import numpy as np 3 | 4 | def test_parsing_encoder(): 5 | encoder = CFGEncoder(10, 1) 6 | parse = encoder.random_structure() 7 | print parse 8 | parts = encoder.transform_structure(parse) 9 | test_structure = encoder.from_parts(parts) 10 | print test_structure 11 | assert (test_structure == parse).all() 12 | 13 | def test_parsing_encoder_all(): 14 | encoder = CFGEncoder(3, 2) 15 | for parse in encoder.all_structures(): 16 | parts = encoder.transform_structure(parse) 17 | test_structure = encoder.from_parts(parts) 18 | print parse 19 | print parts 20 | print test_structure 21 | assert (test_structure == parse).all() 22 | -------------------------------------------------------------------------------- /python/pydecode/nlp/decoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydecode 3 | import pulp 4 | 5 | MAX = float('inf') 6 | MIN = float('-inf') 7 | 8 | class DecodingProblem(object): 9 | """ 10 | Representation of a decoding problem of the form :math:`\max_y f(y)`. 11 | """ 12 | def feasble_set(self): 13 | """ 14 | The set of feasible solutions for the problem. 15 | 16 | """ 17 | pass 18 | 19 | def decode_exhaustive(decoding_problem, scores, coder): 20 | best_score = MIN 21 | best = None 22 | for y in decoding_problem.feasible_set(): 23 | output = coder.transform(y) 24 | indices = np.ravel_multi_index(output.T, coder.shape_) 25 | score = np.sum(scores.ravel()[indices]) 26 | if score > best_score: 27 | best_score = score 28 | best = y 29 | return best 30 | 31 | class HypergraphDecoder(object): 32 | def output_coder(self, problem): 33 | raise NotImplementedError() 34 | 35 | def dynamic_program(self, problem): 36 | raise NotImplementedError() 37 | 38 | def decode(self, decoding_problem, scores): 39 | """ 40 | """ 41 | dp = self.dynamic_program(decoding_problem) 42 | return self.output_coder(decoding_problem).\ 43 | inverse_transform(pydecode.argmax(dp, scores)) 44 | 45 | # TODO: update this 46 | 47 | # class ConstrainedHypergraphDecoder(Decoder): 48 | # def __init__(self, method="ILP"): 49 | # self.method = method 50 | 51 | # def hypergraph(self, problem): 52 | # raise NotImplementedError() 53 | 54 | # def special_decode(self): 55 | # raise NotImplementedError() 56 | 57 | # def decode(self, decoding_problem, scorer): 58 | # hypergraph = self.hypergraph(decoding_problem) 59 | # scores = self.potentials(hypergraph, scorer, decoding_problem) 60 | # constraints = self.constraints(hypergraph, decoding_problem) 61 | 62 | 63 | # if self.method == "ILP": 64 | # hyperlp = lp.HypergraphLP.make_lp(hypergraph, 65 | # scores, 66 | # integral=True) 67 | # hyperlp.add_constraints(constraints) 68 | # hyperlp.solve(pulp.solvers.GLPK(mip=1, msg=0)) 69 | # path = hyperlp.path 70 | # else: 71 | # path = self.special_decode(self.method, decoding_problem, 72 | # hypergraph, scores, constraints, scorer) 73 | 74 | # return self.path_to_instance(decoding_problem, path) 75 | -------------------------------------------------------------------------------- /python/pydecode/nlp/decoding_tests.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydecode.nlp.dependency_parsing as dep 3 | import pydecode.nlp.tagging as tag 4 | 5 | #import pydecode.nlp.permutation as perm 6 | import pydecode.nlp.decoding as decoding 7 | 8 | dependency_instances = [ 9 | (dep.DependencyProblem(i), decoder) 10 | for i in range(2, 6) 11 | for decoder in \ 12 | [(dep.FirstOrderDecoder()), 13 | (dep.SecondOrderDecoder())]] 14 | 15 | 16 | def check_decoding(problem, decoder): 17 | scores = np.random.random(decoder.output_coder(problem).shape_) 18 | optimal = decoding.decode_exhaustive(problem, 19 | scores, 20 | decoder.output_coder(problem)) 21 | hyp_opt = decoder.decode(problem, scores) 22 | print optimal, hyp_opt 23 | 24 | assert(optimal == hyp_opt) 25 | 26 | def test_decoding(): 27 | for problem, decoder in dependency_instances: 28 | yield check_decoding, problem, decoder 29 | 30 | 31 | tagging_instances = [ 32 | (tag.TaggingProblem(i, [1]+([tagset] * (i-2))+[1]), 33 | decoder) 34 | for i in range(4, 8) 35 | for order, decoder in [(2, tag.BigramTagger())] 36 | for tagset in range(3,5) 37 | ] 38 | 39 | 40 | def test_tagging(): 41 | for problem, decoder in tagging_instances: 42 | yield check_decoding, problem, decoder 43 | 44 | # perm_instances = [ 45 | # (perm.PermutationProblem(i), perm.PermutationScorer, decoder) 46 | # for i in range(3, 7) 47 | # for decoder in [perm.PermutationDecoder("ILP"), 48 | # perm.PermutationDecoder("BEAM"), 49 | # perm.PermutationDecoder("MULTIDFA"), 50 | # perm.PermutationDecoder("BIGDFA"), 51 | # perm.PermutationDecoder("CUBE") 52 | # ] 53 | # ] 54 | 55 | 56 | # def test_permutation(): 57 | # for problem, cls, decoder in perm_instances: 58 | # yield check_decoding, problem, cls, decoder 59 | 60 | if __name__ == "__main__": 61 | for a in test_decoding(): 62 | a[0](*a[1:]) 63 | -------------------------------------------------------------------------------- /python/pydecode/nlp/dependency_parsing_tests.py: -------------------------------------------------------------------------------- 1 | from pydecode.nlp.dependency_parsing import * 2 | import numpy as np 3 | 4 | def test_parses(): 5 | assert not is_spanning(np.array([-1, 2, 3, 1])) 6 | assert not is_spanning(np.array([-1, 1, 1, 1])) 7 | assert is_spanning(np.array([-1, 0, 1, 1])) 8 | 9 | assert not is_projective(np.array([-1, 2, 4, 0, 0])) 10 | 11 | def test_parsing_encoder(): 12 | encoder = DependencyParsingEncoder(10, 1) 13 | parse = encoder.random_structure() 14 | parts = encoder.transform_structure(parse) 15 | test_structure = encoder.from_parts(parts) 16 | assert (test_structure == parse).all() 17 | 18 | def test_parsing_encoder_all(): 19 | encoder = DependencyParsingEncoder(3, 2) 20 | for parse in encoder.all_structures(): 21 | print parse 22 | assert(False) 23 | parts = encoder.transform_structure(parse) 24 | test_structure = encoder.from_parts(parts) 25 | assert (test_structure == parse).all() 26 | -------------------------------------------------------------------------------- /python/pydecode/nlp/format.py: -------------------------------------------------------------------------------- 1 | from collections import Counter, defaultdict 2 | 3 | 4 | class Lexicon: 5 | def __init__(self): 6 | pass 7 | 8 | def initialize(self, counts, word_counts, tag_set): 9 | self.counts = counts 10 | self.word_counts = word_counts 11 | self.tag_set = tag_set 12 | self.tag_num = {tag: i for i, tag in enumerate(tag_set)} 13 | self.word_num = {word: i 14 | for i, word in enumerate(word_counts.iterkeys())} 15 | 16 | @staticmethod 17 | def build_lexicon(corpus): 18 | tag_set = set() 19 | word_counts = Counter() 20 | counts = defaultdict(Counter) 21 | for sentence in corpus: 22 | for word in sentence: 23 | counts[word.lex][word.tag] += 1 24 | word_counts[word.lex] += 1 25 | tag_set.add(word.tag) 26 | return Lexicon().initialize(counts, word_counts, tag_set) 27 | 28 | 29 | class Corpus: 30 | def __init__(self, sentences): 31 | self.sentences = sentences 32 | 33 | def __iter__(self): 34 | return iter(self.sentences) 35 | 36 | 37 | class Word: 38 | def __init__(self, lex, ident, tag=None): 39 | self.lex = lex 40 | self.ident = ident 41 | self.tag = tag 42 | 43 | 44 | class Sentence: 45 | def __init__(self, words): 46 | self.words = words 47 | 48 | def __iter__(self): 49 | return iter(self.words) 50 | 51 | def word(self, index): 52 | return self.words[index] 53 | 54 | 55 | class DependencySentence: 56 | def __init__(self, words, dependencies): 57 | self.words = words 58 | self.dependencies = dependencies 59 | 60 | def word(self, index): 61 | return self.words[index] 62 | 63 | def head(self, index): 64 | return self.dependencies[index] 65 | -------------------------------------------------------------------------------- /python/pydecode/nlp/multinomial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydecode.hyper as ph 3 | from collections import defaultdict 4 | import math 5 | 6 | class Multinomial: 7 | def __init__(self): 8 | self.distribution = None 9 | 10 | def estimate(self, counts): 11 | self.distribution = counts / np.sum(counts) 12 | 13 | class MultinomialTable: 14 | def __init__(self): 15 | self.counts = defaultdict(lambda:defaultdict(lambda:0)) 16 | self.probs = {} 17 | 18 | def reestimate(self): 19 | for key, counts in self.counts.iteritems(): 20 | norm = sum(counts.itervalues()) 21 | self.probs[key] = {k: v / norm for k, v in counts.iteritems() } 22 | self.counts = defaultdict(lambda:defaultdict(lambda:0)) 23 | 24 | def spike(self): 25 | for key, probs in self.probs.iteritems(): 26 | print probs.items() 27 | best, _ = max(probs.items(), key=lambda a: a[1]) 28 | self.probs[key] = defaultdict(lambda:0.0) 29 | self.probs[key][best] = 1.0 30 | 31 | 32 | 33 | def inc(self, position, val): 34 | self.counts[position[0]][position[1]] += val 35 | 36 | def to_array(self, hypergraph, label_map): 37 | arr = np.zeros([len(hypergraph.edges)]) 38 | for i, edge in enumerate(hypergraph.edges): 39 | pos = label_map(edge.head.label) 40 | arr[i] = self.probs[pos[0]][pos[1]] 41 | return arr 42 | 43 | def show(self): 44 | for outer, dist in self.probs.iteritems(): 45 | for inner, val in dist.iteritems(): 46 | print outer, inner, val 47 | print 48 | 49 | 50 | 51 | def em(distribution_table, label_map, hypergraph, base=None, 52 | epochs=10): 53 | base_potentials = base 54 | if base is None: 55 | base_potentials = np.zeros([len(hypergraph)]) 56 | 57 | ll = [] 58 | for i in range(epochs): 59 | print "epoch:", i 60 | potentials = ph.LogProbPotentials(hypergraph).from_array( 61 | base_potentials + np.log(distribution_table.to_array(hypergraph, label_map))) 62 | 63 | print "start" 64 | margs = ph.compute_marginals(hypergraph, potentials) 65 | print "stop" 66 | for node in hypergraph.nodes: 67 | distribution_table.inc( 68 | label_map(node.label), 69 | math.exp(margs[node] - margs[hypergraph.root])) 70 | distribution_table.reestimate() 71 | print margs[hypergraph.root] 72 | ll.append(margs[hypergraph.root]) 73 | return ll 74 | -------------------------------------------------------------------------------- /python/pydecode/nlp/phrase_based.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple, defaultdict 2 | import itertools 3 | 4 | 5 | class PhraseTranslation: 6 | def __init__(self): 7 | pass 8 | 9 | 10 | class PhraseScore: 11 | def __init__(self, phrase_score): 12 | pass 13 | def score(): 14 | pass 15 | 16 | class Phrase(namedtuple("Phrase", ["source_span", "target_words"])): 17 | @property 18 | def last(self): 19 | return self.target_words[-1] 20 | 21 | @property 22 | def src_len(self): 23 | return self.source_span[1] - self.source_span[0] 24 | 25 | def __str__(self): 26 | return "(%d, %d) %s"%(self.source_span[0], self.source_span[1], 27 | self.target_words) 28 | 29 | class State(namedtuple("State", ["num_source", "last"])): 30 | def __str__(self): 31 | return "(%d, %d)"%(self.num_source, self.last) 32 | 33 | def make_phrase_table(phrases): 34 | d = defaultdict(list) 35 | for phrase in phrases: 36 | d[phrase.last].append(phrase) 37 | return d 38 | 39 | def phrase_lattice(n, phrase_table, words, c): 40 | c.init(State(0, 0)) 41 | for i in range(1, n): 42 | for last in range(n): 43 | c[State(i, last)] = \ 44 | c.sum([c[key] * c.sr(phrase) 45 | for phrase in phrase_table[last] 46 | for w in words 47 | for num in [i - phrase.src_len] 48 | if num >= 0 49 | for key in [State(num, w)] 50 | if key in c]) 51 | c["END"] = \ 52 | c.sum([c[key] 53 | for w in words 54 | for key in [State(n-1, w)] 55 | if key in c 56 | ]) 57 | -------------------------------------------------------------------------------- /python/pydecode/nlp/tagging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for sequence tagging/labeling problem. 3 | """ 4 | import pydecode 5 | import itertools 6 | import numpy as np 7 | from pydecode.encoder import StructuredEncoder 8 | 9 | 10 | def tagger_first_order(sentence_length, tag_sizes): 11 | n = sentence_length 12 | K = tag_sizes 13 | t = np.max(tag_sizes) 14 | 15 | coder = np.arange(n * t, dtype=np.int64)\ 16 | .reshape([n, t]) 17 | part_encoder = TaggingEncoder(tag_sizes, 1) 18 | out = part_encoder.encoder 19 | 20 | c = pydecode.ChartBuilder(coder, out, 21 | unstrict=True, 22 | lattice=True) 23 | 24 | c.init(coder[0, :K[0]]) 25 | for i in range(1, sentence_length): 26 | for t in range(K[i]): 27 | c.set_t(coder[i, t], 28 | coder[i-1, :K[i-1]], 29 | labels=out[i, :K[i-1], t]) 30 | 31 | return c.finish(False), part_encoder 32 | 33 | class TaggingEncoder(StructuredEncoder): 34 | def __init__(self, tag_sizes, order=1): 35 | self.tag_sizes = tag_sizes 36 | self.size = len(self.tag_sizes) 37 | self.order = order 38 | n = len(tag_sizes) 39 | t = np.max(tag_sizes) 40 | shape = (n, t, t) 41 | super(TaggingEncoder, self).__init__(shape) 42 | 43 | def transform_structure(self, tagging): 44 | if self.order == 1: 45 | return np.array([np.append([i], tagging[i-self.order:i+1]) 46 | for i in range(self.order, len(tagging))]) 47 | 48 | def from_parts(self, parts): 49 | sequence = np.zeros(self.size, dtype=np.int32) 50 | for (i, pt, t) in parts: 51 | sequence[i] = t 52 | sequence[i-1] = pt 53 | return sequence 54 | 55 | def all_structures(self): 56 | """ 57 | Generate all valid tag sequences for a tagging problem. 58 | """ 59 | for seq in itertools.product(*map(range, self.tag_sizes)): 60 | yield np.array(seq) 61 | 62 | def random_structure(self): 63 | sequence = np.zeros(len(self.tag_sizes)) 64 | for i, size in enumerate(self.tag_sizes): 65 | sequence[i] = np.random.randint(size) 66 | return sequence 67 | -------------------------------------------------------------------------------- /python/pydecode/nlp/tagging_tests.py: -------------------------------------------------------------------------------- 1 | from pydecode.nlp.tagging import * 2 | import numpy as np 3 | 4 | def test_tagging(): 5 | encoder = TaggingEncoder([5] * 10, 1) 6 | sequence = encoder.random_structure() 7 | parts = encoder.transform_structure(sequence) 8 | test_structure = encoder.from_parts(parts) 9 | assert (test_structure == sequence).all() 10 | -------------------------------------------------------------------------------- /python/pydecode/potentials.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srush/PyDecode/7f64618663575a760e5c514b2cb771b8a4ca3131/python/pydecode/potentials.so -------------------------------------------------------------------------------- /python/pydecode/templates/beam.pxd.tpl: -------------------------------------------------------------------------------- 1 | # Cython template hack. 2 | from cython.operator cimport dereference as deref 3 | from libcpp cimport bool 4 | from libcpp.vector cimport vector 5 | 6 | 7 | cdef extern from "" namespace "std": 8 | cdef cppclass cbitset "bitset<500>": 9 | void set(int, int) 10 | bool& operator[](int) 11 | 12 | cdef class Bitset: 13 | cdef cbitset data 14 | cdef init(self, cbitset data) 15 | 16 | cdef extern from "Hypergraph/BeamSearch.hh": 17 | 18 | cdef cppclass CBeamGroups "BeamGroups": 19 | CBeamGroups(const CHypergraph *graph, 20 | const vector[int] groups, 21 | const vector[int] group_limit, 22 | int num_groups) 23 | 24 | {% for S in semirings %} 25 | 26 | cdef extern from "Hypergraph/BeamSearch.hh" namespace "BeamChart<{{S.type}}>": 27 | cdef cppclass CBeamHyp{{S.type}} "BeamChart<{{S.type}}>::BeamHyp": 28 | {{S.cvalue}} sig 29 | double current_score 30 | double future_score 31 | 32 | CBeamChart{{S.type}} *cbeam_search{{S.type}} "BeamChart<{{S.type}}>::beam_search" ( 33 | const CHypergraph *graph, 34 | const double *potentials, 35 | const {{S.cvalue}} *constraints, 36 | const double *outside, 37 | double lower_bound, 38 | const CBeamGroups &groups, 39 | bool recombine) except + 40 | 41 | CBeamChart{{S.type}} *ccube_pruning{{S.type}} "BeamChart<{{S.type}}>::cube_pruning" ( 42 | const CHypergraph *graph, 43 | const double *potentials, 44 | const {{S.cvalue}} *constraints, 45 | const double *outside, 46 | double lower_bound, 47 | const CBeamGroups &groups, 48 | bool recombine) except + 49 | 50 | 51 | cdef extern from "Hypergraph/BeamSearch.hh": 52 | cdef cppclass CBeamChart{{S.type}} "BeamChart<{{S.type}}>": 53 | CHyperpath *get_path(int result) 54 | vector[CBeamHyp{{S.type}} *] get_beam(int) 55 | bool exact 56 | 57 | cdef class BeamChart{{S.type}}: 58 | cdef CBeamChart{{S.type}} *thisptr 59 | cdef Hypergraph graph 60 | 61 | cdef init(self, CBeamChart{{S.type}} *chart, Hypergraph graph) 62 | 63 | {% endfor %} 64 | -------------------------------------------------------------------------------- /python/pydecode/templates/beam.yaml: -------------------------------------------------------------------------------- 1 | semirings: 2 | # - type: BinaryVectorPotential 3 | # from: BinaryVector 4 | # cvalue: cbitset 5 | # - type: AlphabetPotential 6 | # from: Alphabet 7 | # cvalue: vector[int] 8 | - type: LogViterbi 9 | from: LogViterbi 10 | cvalue: float -------------------------------------------------------------------------------- /python/pydecode/templates/chart.pxd: -------------------------------------------------------------------------------- 1 | cimport numpy as np 2 | cimport cython 3 | 4 | cdef class ChartBuilder: 5 | cdef _init_buffer(self, long [:] indices) 6 | cdef _init_list(self, indices) 7 | 8 | cdef _set_transpose(self, 9 | long index, 10 | long [:] tails1, 11 | long [:] tails2=*, 12 | long [:] tails3=*, 13 | long [:] out=*) 14 | cdef _set_list(self, long index, tuples, out=*) 15 | cdef _finish_node(self, long index, result) 16 | 17 | cdef CHypergraph *_hg_ptr 18 | cdef CHypergraphBuilder *_builder 19 | cdef vector[int] *_chart 20 | 21 | cdef bool _done 22 | cdef int _last 23 | cdef set[long] _no_tail 24 | cdef bool _strict 25 | cdef int _max_arity 26 | 27 | cdef int _size 28 | cdef items 29 | cdef outputs 30 | 31 | cdef _ndata 32 | cdef _nindices 33 | cdef _nind 34 | 35 | 36 | cdef int _output_size 37 | cdef bool _construct_output 38 | cdef _data 39 | cdef _indices 40 | cdef _ind 41 | 42 | 43 | cdef np.ndarray _edges1 44 | cdef np.ndarray _edges2 45 | cdef np.ndarray _out 46 | 47 | cdef _lattice 48 | -------------------------------------------------------------------------------- /python/pydecode/templates/cython_jinja.py: -------------------------------------------------------------------------------- 1 | from jinja2 import Environment, FileSystemLoader 2 | import yaml 3 | import sys 4 | 5 | fast_mode = False 6 | if len(sys.argv) > 1 and sys.argv[1] == "fast": 7 | fast_mode = True 8 | 9 | env = Environment(loader=FileSystemLoader('python/pydecode/templates')) 10 | 11 | vars = yaml.load(open("python/pydecode/templates/weights.yaml")) 12 | vars_beam = yaml.load(open("python/pydecode/templates/beam.yaml")) 13 | 14 | if fast_mode: 15 | vars_beam = {"semirings":[]} 16 | vars["semirings"] = vars["semirings"][:4] 17 | for var in vars["semirings"][:4]: 18 | var["ctype"] = var["type"] 19 | 20 | else: 21 | for var in vars["semirings"]: 22 | var["ctype"] = var["type"] 23 | 24 | 25 | 26 | 27 | template = env.get_template('algorithms.pyx.tpl') 28 | template_beam = env.get_template('beam.pyx.tpl') 29 | out = open("python/pydecode/_pydecode.pyx", "w") 30 | print >>out, open("python/pydecode/templates/libhypergraph.pyx").read() 31 | # print >>out, open("python/pydecode/templates/extensions.pyx").read() 32 | print >>out, open("python/pydecode/templates/chart.pyx").read() 33 | print >>out, template_beam.render(vars_beam) 34 | print >>out, template.render(vars) 35 | 36 | template = env.get_template('algorithms.pxd.tpl') 37 | template_beam = env.get_template('beam.pxd.tpl') 38 | out = open("python/pydecode/_pydecode.pxd", "w") 39 | print >>out, open("python/pydecode/templates/libhypergraph.pxd").read() 40 | print >>out, open("python/pydecode/templates/chart.pxd").read() 41 | # print >>out, open("python/pydecode/templates/extensions.pxd").read() 42 | print >>out, template_beam.render(vars_beam) 43 | print >>out, template.render(vars) 44 | -------------------------------------------------------------------------------- /python/pydecode/templates/extensions.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "Hypergraph/Automaton.hh": 2 | cdef cppclass CDFA "DFA": 3 | CDFA(int num_states, int num_symbols, 4 | const vector[map[int, int] ] &transition, 5 | const set[int] &final) 6 | bool final(int state) 7 | int transition(int state, int symbol) 8 | int valid_transition(int state, int symbol) 9 | 10 | 11 | cdef extern from "Hypergraph/Algorithms.hh": 12 | CHypergraph *cmake_lattice "make_lattice"( 13 | int width, int height, 14 | const vector[vector[int] ] transitions, 15 | vector[CLatticeLabel ] *transitions) except + 16 | 17 | cdef cppclass CLatticeLabel "LatticeLabel": 18 | int i 19 | int j 20 | 21 | cdef cppclass CDFALabel "DFANode": 22 | int left_state 23 | int right_state 24 | 25 | 26 | cdef class DFALabel: 27 | cdef CDFALabel label 28 | cdef _core 29 | cdef init(DFALabel self, CDFALabel label, core) 30 | 31 | cdef class DFA: 32 | cdef const CDFA *thisptr 33 | 34 | 35 | cdef class LatticeLabel: 36 | cdef CLatticeLabel label 37 | cdef init(LatticeLabel self, CLatticeLabel label) 38 | -------------------------------------------------------------------------------- /python/pydecode/templates/weights.yaml: -------------------------------------------------------------------------------- 1 | semirings: 2 | - type: Viterbi 3 | cvalue: double 4 | npvalue: np.double 5 | viterbi: Yes 6 | description: 7 | Real-valued max probability potentials. 8 | Uses the operations :math:`(+, *) = (\max, *)`. 9 | - type: LogViterbi 10 | cvalue: double 11 | npvalue: np.double 12 | viterbi: True 13 | description: 14 | Real-valued max log-probability potentials. 15 | 16 | Uses the operations :math:`(+, *) = (\max, *)`. 17 | - type: Real 18 | cvalue: double 19 | npvalue: np.double 20 | viterbi: False 21 | description: 22 | Real-valued inside log-probability potentials. 23 | - type: Log 24 | cvalue: double 25 | npvalue: np.double 26 | viterbi: False 27 | description: 28 | Real-valued inside log-probability potentials. 29 | 30 | - type: Boolean 31 | npvalue: np.int8 32 | cvalue: char 33 | viterbi: Yes 34 | description: 35 | Boolean-valued logical potentials. 36 | 37 | Uses the operations :math:`(+, *) = (\land, \lor)`. 38 | - type: Counting 39 | cvalue: int 40 | npvalue: np.int32 41 | viterbi: False, 42 | description: 43 | Natural-valued counting potentials. 44 | 45 | Uses the operations :math:`(+, *) = (+, *)`. 46 | - type: MinMax 47 | cvalue: double 48 | npvalue: np.double 49 | viterbi: No 50 | description: 51 | Real-valued min value potentials. 52 | 53 | Uses the operations :math:`(+, *) = (\min, \max)`. 54 | 55 | # - type: Inside 56 | # cvalue: double 57 | # npvalue: np.double 58 | # format: d 59 | # viterbi: Yes 60 | # description: 61 | # Real-valued probability potentials. 62 | 63 | # Uses the operations :math:`(+, *) = (+, *)`. 64 | 65 | 66 | # - type: Alphabet 67 | # ctype: AlphabetPotential 68 | # cvalue: vector[int] 69 | # viterbi: No 70 | # description: 71 | # Alphabet valued weights. 72 | 73 | 74 | 75 | 76 | # - type: BinaryVector 77 | # cvalue: cbitset 78 | # pvalue: Bitset 79 | # to_cpp: val.data 80 | # from_cpp: Bitset().init(val) 81 | # viterbi: No 82 | # description: 83 | # Binary vector potentials. 84 | 85 | # - type: MinSparseVector 86 | # cvalue: vector[pair[int, int]] 87 | # viterbi : False 88 | 89 | # - type: MaxSparseVector 90 | # cvalue: vector[pair[int, int]] 91 | # viterbi : No 92 | 93 | # - type: SparseVector 94 | # ctype: SparseVectorPotential 95 | # cvalue: vector[pair[int, int]] 96 | # viterbi: No 97 | # description: 98 | # Sparse-vector valued weights. 99 | -------------------------------------------------------------------------------- /python/pydecode/test/chart_tests.py: -------------------------------------------------------------------------------- 1 | 2 | import pydecode 3 | import random 4 | 5 | # class SizedTupleHasher: 6 | # """ 7 | # For hashing chart items to integers. 8 | # """ 9 | # def __init__(self, sizes): 10 | # self._sizes = sizes 11 | # multi = 1 12 | # self._multipliers = [] 13 | # for s in sizes: 14 | # self._multipliers.append(multi) 15 | # multi *= s 16 | 17 | # self._max_size = multi 18 | 19 | # def __call__(self, tupl): 20 | # val = 0 21 | # multiplier = 1 22 | # for i in range(len(tupl)): 23 | # val += self._multipliers[i] * tupl[i] 24 | # return val 25 | 26 | # def max_size(self): 27 | # return self._max_size 28 | 29 | # def unhash(self, val): 30 | # tupl = () 31 | # v = val 32 | # for i in range(len(self._multipliers)-1, -1, -1): 33 | # tupl = (v / self._multipliers[i],) + tupl 34 | # v = v % self._multipliers[i] 35 | # return tupl 36 | 37 | 38 | # def test_sized_hasher(): 39 | # a = random.randint(1, 40) 40 | # b = random.randint(1, 40) 41 | # c = random.randint(1, 40) 42 | # hasher = ph.SizedTupleHasher([a,b,c]) 43 | # #hasher = SizedTupleHasher([a,b,c]) 44 | # d = set() 45 | # for i in range(a): 46 | # for j in range(b): 47 | # for k in range(c): 48 | # v = hasher.hasher((i,j,k)) 49 | # print (i,j,k), v 50 | # assert(v not in d) 51 | # d.add(v) 52 | -------------------------------------------------------------------------------- /python/pydecode/test/hypergraph_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for hypergraph construction and basic data structures. 3 | """ 4 | 5 | import pydecode 6 | import pydecode.test.utils as utils 7 | import nose.tools as nt 8 | from pydecode.test.utils import hypergraphs 9 | 10 | 11 | def test_main(): 12 | for graph in hypergraphs(): 13 | # yield check_all_valid, graph 14 | yield check_numbering, graph 15 | yield check_hypergraph, graph 16 | assert utils.check_fully_connected(graph) 17 | 18 | # def check_all_valid(graph): 19 | # for path in utils.all_paths(graph): 20 | # utils.valid_path(graph, path) 21 | 22 | 23 | def check_numbering(graph): 24 | """ 25 | Check that the numbering nodes and edges is correct. 26 | """ 27 | for i, node in enumerate(graph.nodes): 28 | nt.assert_equal(node.id, i) 29 | for i, edge in enumerate(graph.edges): 30 | nt.assert_equal(edge.id, i) 31 | 32 | 33 | def check_hypergraph(graph): 34 | """ 35 | Check the assumptions about the hypergraph. 36 | """ 37 | 38 | terminal = True 39 | children = set() 40 | 41 | # Check that terminal nodes are first. 42 | print len(graph.nodes) 43 | print len(graph.edges) 44 | for node in graph.nodes: 45 | print node.id 46 | 47 | for node in graph.nodes: 48 | if not terminal and len(node.edges) == 0: 49 | assert False 50 | if len(node.edges) != 0: 51 | terminal = False 52 | 53 | # Check ordering. 54 | for edge in node.edges: 55 | for tail_node in edge.tail: 56 | nt.assert_less(tail_node.id, node.id) 57 | children.add(tail_node.id) 58 | 59 | # Only 1 root. 60 | nt.assert_equal(len(children), len(graph.nodes) - 1) 61 | 62 | 63 | # def check_serialization(graph): 64 | # s = pydecode.io.hypergraph_to_json(graph) 65 | # hyper2 = pydecode.io.json_to_hypergraph(s) 66 | # nt.assert_equal(len(graph.edges), len(hyper2.edges)) 67 | # nt.assert_equal(len(graph.nodes), len(hyper2.nodes)) 68 | 69 | 70 | @nt.raises(Exception) 71 | def test_diff_potentials_fail(): 72 | h1, w1 = utils.random_hypergraph() 73 | h2, w2 = utils.random_hypergraph() 74 | pydecode.best_path(h1, w2) 75 | 76 | 77 | @nt.raises(Exception) 78 | def test_outside_fail(): 79 | h1, w1 = utils.random_hypergraph() 80 | h2, w2 = utils.random_hypergraph() 81 | pydecode.outside_path(h1, w2) 82 | 83 | 84 | @nt.raises(Exception) 85 | def test_builder(): 86 | h = pydecode.Hypergraph() 87 | b = h.builder() 88 | b.add_node([]) 89 | 90 | 91 | @nt.raises(Exception) 92 | def test_bad_edge(): 93 | h = pydecode.Hypergraph() 94 | with h.builder() as b: 95 | n1 = b.add_node() 96 | b.add_node(([n1],)) 97 | 98 | if __name__ == "__main__": 99 | for a in test_main(): 100 | print a[0] 101 | a[0](*a[1:]) 102 | -------------------------------------------------------------------------------- /python/pydecode/test/transform_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for hypergraph mapping and pruning. 3 | """ 4 | 5 | import pydecode 6 | import pydecode.test.utils as utils 7 | import nose.tools as nt 8 | import numpy as np 9 | import numpy.random 10 | import numpy.testing 11 | 12 | def test_pruning(): 13 | for h in utils.hypergraphs(): 14 | 15 | w = numpy.random.random(len(h.edges)) 16 | 17 | original_path = pydecode.best_path(h, w) 18 | marginals = pydecode.marginals(h, w) 19 | best = w.T * original_path.v 20 | print marginals[1] 21 | a = np.array(marginals > 0.99* best, dtype=np.uint8) 22 | 23 | # new_hyper = pydecode.filter(h, a) 24 | # # print project.shape, w.shape 25 | # # print project.todense() 26 | # # new_potentials = project * w 27 | # prune_path = pydecode.best_path(new_hyper, new_potentials) 28 | # assert len(original_path.edges) > 0 29 | 30 | 31 | # # print "proect ", project.shape 32 | # # print project * prune_path.v, (project * prune_path.v).shape 33 | # # print original_path.v, original_path.v.shape 34 | # numpy.testing.assert_equal( 35 | # prune_path.v.todense(), 36 | # (project * original_path.v).todense()) 37 | 38 | # # nt.assert_almost_equal( 39 | # best, 40 | # new_potentials.T * prune_path.v) 41 | 42 | # Test pruning amount. 43 | # prune = 0.001 44 | # max_marginals = ph.marginals(h, w) 45 | # prune_projection = ph.prune(h, w, prune) 46 | 47 | # new_hyper = prune_projection.small_hypergraph 48 | # new_potentials = w.project(h, prune_projection) 49 | 50 | # assert (len(new_hyper.edges) > 0) 51 | # original_edges = {} 52 | # for edge in h.edges: 53 | # original_edges[edge.label] = edge 54 | 55 | # new_edges = {} 56 | # for edge in new_hyper.edges: 57 | # new_edges[edge.label] = edge 58 | 59 | # for name, edge in new_edges.iteritems(): 60 | # orig = original_edges[name] 61 | # nt.assert_almost_equal(w[orig], new_potentials[edge]) 62 | # m = max_marginals[orig] 63 | # nt.assert_greater(m, prune) 64 | 65 | if __name__ == "__main__": 66 | test_pruning() 67 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.7 2 | scipy 3 | nose>1.0 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | 4 | [files] 5 | resources = 6 | notebooks = {datadir}/notebooks/ 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | import os.path 4 | import sys 5 | import numpy as np 6 | 7 | def check_for_cython(): 8 | return True 9 | 10 | class ExtensionWrapper: 11 | def __init__(self, debug=False, cython=False): 12 | self.debug = debug 13 | self.cython = cython 14 | 15 | def make(self, ext_name, pyx_name, cpp_names, 16 | extra_objects=[]): 17 | return Extension(ext_name, 18 | [pyx_name] + cpp_names 19 | if self.cython 20 | else [pyx_name.split(".")[0] + "." + "cpp"] + cpp_names, 21 | language='c++', 22 | extra_compile_args=["-O0"] if self.debug else [], 23 | include_dirs=[r'src/', "."]) 24 | 25 | def cmdclass(self): 26 | if self.cython: 27 | from Cython.Distutils import build_ext 28 | from Cython.Build import cythonize 29 | return {'build_ext': build_ext} 30 | return {} 31 | 32 | 33 | def make_extension(wrapper): 34 | a = [wrapper.make("pydecode._pydecode", 35 | "python/pydecode/_pydecode.pyx", 36 | ["src/Hypergraph/Hypergraph.cpp", 37 | "src/Hypergraph/Semirings.cpp", 38 | "src/Hypergraph/SemiringAlgorithms.cpp", 39 | "src/Hypergraph/Algorithms.cpp", 40 | "src/Hypergraph/BeamSearch.cpp" 41 | ])] 42 | return a 43 | 44 | def main(): 45 | copy_args = sys.argv[1:] 46 | has_cython = check_for_cython() 47 | if '--cython' not in copy_args: 48 | has_cython = False 49 | if '--cython' in copy_args: 50 | copy_args.remove("--cython") 51 | debug = False 52 | if '--debug' in copy_args: 53 | debug = True 54 | copy_args.remove("--debug") 55 | 56 | print sys.argv 57 | print "done" 58 | wrapper = ExtensionWrapper(cython=has_cython, debug=debug) 59 | 60 | setup( 61 | name = 'pydecode', 62 | cmdclass = wrapper.cmdclass(), 63 | packages=['pydecode', 'pydecode.nlp', 'pydecode.test'], 64 | package_dir={'pydecode': 'python/pydecode'}, 65 | ext_modules = make_extension(wrapper), 66 | requires=["numpy"], 67 | version = '0.2.11', 68 | description = 'A dynamic programming toolkit', 69 | author = 'Alexander Rush', 70 | author_email = 'srush@csail.mit.edu', 71 | url = 'https://github.com/srush/pydecode/', 72 | download_url = 'https://github.com/srush/PyDecode/tarball/master', 73 | keywords = ['nlp'], 74 | classifiers = [], 75 | script_args = copy_args, 76 | include_dirs = [np.get_include()], 77 | data_files=[('pydecode/notebooks', ['notebooks/hmm.ipynb', 78 | "notebooks/Fibonacci.ipynb", 79 | "notebooks/EditDistance.ipynb", 80 | "notebooks/parsing.ipynb"] 81 | )] 82 | ) 83 | 84 | if __name__ == "__main__": 85 | main() 86 | -------------------------------------------------------------------------------- /src/Hypergraph/Algorithms.hh: -------------------------------------------------------------------------------- 1 | // Copyright [2013] Alexander Rush 2 | 3 | #ifndef HYPERGRAPH_ALGORITHMS_H_ 4 | #define HYPERGRAPH_ALGORITHMS_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "./common.h" 11 | 12 | #include "Hypergraph/Automaton.hh" 13 | #include "Hypergraph/Hypergraph.hh" 14 | #include "Hypergraph/Semirings.hh" 15 | #include "Hypergraph/SemiringAlgorithms.hh" 16 | 17 | Hypergraph *filter(const Hypergraph *graph, 18 | const bool *edge_mask); 19 | 20 | Hypergraph *binarize(const Hypergraph *graph); 21 | 22 | Hypergraph *intersect(Hypergraph *graph, 23 | const int *labels, 24 | const DFA &dfa); 25 | 26 | Hypergraph *intersect_count(Hypergraph *graph, 27 | const int *edge_counts, 28 | int lower_limit, 29 | int upper_limit, 30 | int goal); 31 | 32 | 33 | /* Chart *edge_domination(const Hypergraph &graph); */ 34 | /* Chart *node_domination(const Hypergraph &graph); */ 35 | // vector > *children_nodes(const Hypergraph &graph); 36 | 37 | // vector > *children_sparse( 38 | // const Hypergraph *graph, 39 | // const HypergraphPotentials &potentials); 40 | 41 | 42 | // set *updated_nodes( 43 | // const Hypergraph *graph, 44 | // const vector > &children, 45 | // const set &updated); 46 | 47 | 48 | // struct LatticeLabel { 49 | // LatticeLabel() {} 50 | // LatticeLabel(int i_, int j_) : i(i_), j(j_) {} 51 | // int i, j; 52 | // }; 53 | 54 | // Hypergraph *make_lattice(int width, int height, 55 | // const vector > &transitions, 56 | // vector *labels); 57 | 58 | 59 | 60 | 61 | #endif // HYPERGRAPH_ALGORITHMS_H_ 62 | -------------------------------------------------------------------------------- /src/Hypergraph/Automaton.hh: -------------------------------------------------------------------------------- 1 | // Copyright [2014] Alexander Rush 2 | 3 | #ifndef HYPERGRAPH_AUTOMATON_H_ 4 | #define HYPERGRAPH_AUTOMATON_H_ 5 | 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | class DFA { 12 | public: 13 | DFA(uint num_states, uint num_symbols, 14 | const vector > &transition, 15 | const set &final) 16 | : states_(num_states), 17 | transition_(transition), 18 | final_(final) { 19 | assert(transition.size() == num_states); 20 | assert(transition[0].size() == num_symbols); 21 | for (uint i = 0; i < num_states; ++i) { 22 | states_[i] = i; 23 | } 24 | } 25 | 26 | const vector &states() const { 27 | return states_; 28 | } 29 | 30 | int transition(int state, int symbol) const { 31 | return transition_[state].at(symbol); 32 | } 33 | 34 | int valid_transition(int state, int symbol) const { 35 | return transition_[state].find(symbol) != transition_[state].end(); 36 | } 37 | 38 | int final(int state) const { 39 | return final_.find(state) != final_.end(); 40 | } 41 | 42 | 43 | private: 44 | 45 | vector states_; 46 | const vector > transition_; 47 | const set final_; 48 | }; 49 | 50 | 51 | #endif // HYPERGRAPH_AUTOMATON_H_ 52 | -------------------------------------------------------------------------------- /src/Hypergraph/SemiringAlgorithms.hh: -------------------------------------------------------------------------------- 1 | // Copyright [2013] Alexander Rush 2 | 3 | #ifndef HYPERGRAPH_SEMIRINGALGORITHMS_H_ 4 | #define HYPERGRAPH_SEMIRINGALGORITHMS_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "./common.h" 11 | 12 | #include "Hypergraph/Hypergraph.hh" 13 | #include "Hypergraph/Semirings.hh" 14 | 15 | template 16 | void general_inside( 17 | const Hypergraph *graph, 18 | const typename SemiringType::ValType *weights, 19 | typename SemiringType::ValType *chart); 20 | 21 | template 22 | void general_outside( 23 | const Hypergraph *graph, 24 | const typename SemiringType::ValType *weights, 25 | const typename SemiringType::ValType *inside_chart, 26 | typename SemiringType::ValType *chart); 27 | 28 | template 29 | void general_viterbi( 30 | const Hypergraph *graph, 31 | const typename SemiringType::ValType *potentials, 32 | typename SemiringType::ValType *chart, 33 | int *back_pointers, 34 | bool *mask); 35 | 36 | template 37 | void transform(const Hypergraph *graph, 38 | const typename SemiringType::ValType *weights, 39 | const int *labeling, 40 | typename SemiringType::ValType *label_weights, 41 | int label_size); 42 | 43 | template 44 | void general_kbest( 45 | const Hypergraph *graph, 46 | const typename SemiringType::ValType *weights, 47 | int K, 48 | vector *); 49 | 50 | template 51 | void node_marginals(const Hypergraph *hypergraph, 52 | const typename S::ValType *in_chart, 53 | const typename S::ValType *out_chart, 54 | typename S::ValType *node_marginals); 55 | 56 | template 57 | void edge_marginals(const Hypergraph *hypergraph, 58 | const typename S::ValType *weights, 59 | const typename S::ValType *in_chart, 60 | const typename S::ValType *out_chart, 61 | typename S::ValType *edge_marginals); 62 | 63 | #endif // HYPERGRAPH_SEMIRINGALGORITHMS_H_ 64 | -------------------------------------------------------------------------------- /src/Hypergraph/Semirings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright [2013] Alexander Rush 2 | 3 | #include "Hypergraph/Semirings.hh" 4 | -------------------------------------------------------------------------------- /src/README.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Decoding Library 3 | ================ 4 | 5 | C++ library code underlying the python modules. 6 | -------------------------------------------------------------------------------- /src/SConscript: -------------------------------------------------------------------------------- 1 | Import('env') 2 | 3 | sources = [ 4 | "Hypergraph/Hypergraph.cpp", 5 | "Hypergraph/SemiringAlgorithms.cpp", 6 | "Hypergraph/Algorithms.cpp", 7 | "Hypergraph/Semirings.cpp", 8 | "Hypergraph/BeamSearch.cpp", 9 | ] 10 | 11 | hyp_lib = env.Library('decoding', sources) 12 | 13 | Return('hyp_lib') 14 | -------------------------------------------------------------------------------- /src/Tests.cpp: -------------------------------------------------------------------------------- 1 | //test.cpp 2 | #include 3 | #include 4 | #include "Hypergraph/Hypergraph.hh" 5 | #include "Hypergraph/Semirings.hh" 6 | #include "./common.h" 7 | 8 | #ifndef NUM_LOOPS 9 | #define NUM_LOOPS 10000 10 | #endif 11 | 12 | #ifndef SEMIRINGTEST 13 | #define SEMIRINGTEST(TYPE) \ 14 | do { \ 15 | TYPE::ValType a = TYPE::randValue(); \ 16 | TYPE::ValType b = TYPE::randValue(); \ 17 | TYPE::ValType c = TYPE::safe_add(a, b); \ 18 | TYPE::ValType d = TYPE::safe_times(a, b); \ 19 | /* Test consistency */ \ 20 | ASSERT_EQ(a, TYPE::normalize(a)); \ 21 | ASSERT_EQ(c, TYPE::safe_add(a,b)); \ 22 | ASSERT_EQ(d, TYPE::safe_times(a,b)); \ 23 | /* Test properties */ \ 24 | ASSERT_EQ(TYPE::safe_add(a, TYPE::zero()), a); \ 25 | ASSERT_EQ(TYPE::safe_times(a, TYPE::one()), a); \ 26 | ASSERT_EQ(TYPE::safe_times(a, TYPE::zero()), TYPE::zero()); \ 27 | } while(0) 28 | #endif 29 | 30 | /* 31 | TEST(Decode, TestHypergraph) { 32 | Hypergraph test; 33 | vector nodes; 34 | nodes.push_back(test.add_terminal_node("one")); 35 | nodes.push_back(test.add_terminal_node("two")); 36 | nodes.push_back(test.add_terminal_node("three")); 37 | 38 | test.start_node("root"); 39 | test.add_edge(nodes, "Edgy"); 40 | test.end_node(); 41 | 42 | test.finish(); 43 | ASSERT_EQ(test.nodes().size(), 4); 44 | ASSERT_EQ(test.edges().size(), 1); 45 | } 46 | 47 | TEST(Decode, SemiringTests) { 48 | srand(time(NULL)); 49 | typedef CompPotential CVL; 50 | for(uint i = 0; i < NUM_LOOPS; ++i) { 51 | SEMIRINGTEST(ViterbiPotential); 52 | SEMIRINGTEST(LogViterbiPotential); 53 | SEMIRINGTEST(InsidePotential); 54 | SEMIRINGTEST(RealPotential); 55 | SEMIRINGTEST(TropicalPotential); 56 | SEMIRINGTEST(BoolPotential); 57 | SEMIRINGTEST(CountingPotential); 58 | SEMIRINGTEST(CVL); 59 | // SEMIRINGTEST(SparseVectorPotential); 60 | // SEMIRINGTEST(TreePotential); 61 | } 62 | } 63 | */ 64 | 65 | TEST(Decode, BinarySemiringTests) { 66 | srand(time(NULL)); 67 | typedef CompPotential CVL; 68 | for(uint i = 0; i < NUM_LOOPS; ++i) { 69 | BinaryVectorPotential::ValType a = BinaryVectorPotential::randValue(); 70 | ASSERT_EQ(BinaryVectorPotential::times(a, BinaryVectorPotential::one()), a); 71 | ASSERT_EQ(BinaryVectorPotential::times(a, BinaryVectorPotential::zero()), BinaryVectorPotential::zero()); 72 | ASSERT_EQ(BinaryVectorPotential::add(a, BinaryVectorPotential::zero()), a); 73 | } 74 | 75 | BinaryVectorPotential::ValType a = BinaryVectorPotential::ValType(0xfa); 76 | BinaryVectorPotential::ValType b = BinaryVectorPotential::ValType(0x05); 77 | BinaryVectorPotential::ValType c = BinaryVectorPotential::ValType(0x15); 78 | ASSERT_EQ(BinaryVectorPotential::ValType(0xff), BinaryVectorPotential::times(a, b)); 79 | ASSERT_EQ(BinaryVectorPotential::ValType(0xff), BinaryVectorPotential::times(a, b)); 80 | ASSERT_TRUE(BinaryVectorPotential::valid(a, b)); 81 | ASSERT_FALSE(BinaryVectorPotential::valid(a, c)); 82 | } 83 | 84 | int main(int argc, char** argv) { 85 | ::testing::InitGoogleTest(&argc, argv); 86 | return RUN_ALL_TESTS(); 87 | } 88 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include 6 | #include "./foreach.h" 7 | 8 | #define INF 1e9 9 | #define foreach BOOST_FOREACH 10 | 11 | typedef unsigned int uint; 12 | class Clock { 13 | public: 14 | static double diffclock(clock_t clock1,clock_t clock2){ 15 | double diffticks=clock1-clock2; 16 | double diffms=(diffticks*1000)/CLOCKS_PER_SEC; 17 | return diffms; 18 | } 19 | }; 20 | 21 | inline double dRand(double dMin, double dMax) { 22 | double d = (double)rand() / RAND_MAX; 23 | return dMin + d * (dMax - dMin); 24 | } 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/foreach.h: -------------------------------------------------------------------------------- 1 | #ifndef BOOST_FOREACH 2 | /////////////////////////////////////////////////////////////////////////////// 3 | // A stripped down version of FOREACH for 4 | // illustration purposes. NOT FOR GENERAL USE. 5 | // For a complete implementation, see BOOST_FOREACH at 6 | // http://boost-sandbox.sourceforge.net/vault/index.php?directory=eric_niebler 7 | // 8 | // Copyright 2004 Eric Niebler. 9 | // Distributed under the Boost Software License, Version 1.0. (See 10 | // accompanying file LICENSE_1_0.txt or copy at 11 | // http://www.boost.org/LICENSE_1_0.txt) 12 | // 13 | // Adapted to Assimp November 29th, 2008 (Alexander Gessler). 14 | // Added code to handle both const and non-const iterators, simplified some 15 | // parts. 16 | /////////////////////////////////////////////////////////////////////////////// 17 | namespace boost { 18 | namespace foreach_detail { 19 | /////////////////////////////////////////////////////////////////////////////// 20 | // auto_any 21 | struct auto_any_base 22 | { 23 | operator bool() const { return false; } 24 | }; 25 | template 26 | struct auto_any : auto_any_base 27 | { 28 | auto_any(T const& t) : item(t) {} 29 | mutable T item; 30 | }; 31 | template 32 | T& auto_any_cast(auto_any_base const& any) 33 | { 34 | return static_cast const&>(any).item; 35 | } 36 | /////////////////////////////////////////////////////////////////////////////// 37 | // FOREACH helper function 38 | template 39 | auto_any begin(T const& t) 40 | { 41 | return t.begin(); 42 | } 43 | template 44 | auto_any end(T const& t) 45 | { 46 | return t.end(); 47 | } 48 | // iterator 49 | template 50 | bool done(auto_any_base const& cur, auto_any_base const& end, T&) 51 | { 52 | typedef typename T::iterator iter_type; 53 | return auto_any_cast(cur) == auto_any_cast(end); 54 | } 55 | template 56 | void next(auto_any_base const& cur, T&) 57 | { 58 | ++auto_any_cast(cur); 59 | } 60 | template 61 | typename T::reference deref(auto_any_base const& cur, T&) 62 | { 63 | return *auto_any_cast(cur); 64 | } 65 | } // end foreach_detail 66 | /////////////////////////////////////////////////////////////////////////////// 67 | // FOREACH 68 | #define BOOST_FOREACH(item, container) \ 69 | if (boost::foreach_detail::auto_any_base const& b = boost::foreach_detail::begin(container)) {} else \ 70 | if (boost::foreach_detail::auto_any_base const& e = boost::foreach_detail::end(container)) {} else \ 71 | for (;!boost::foreach_detail::done(b,e,container); boost::foreach_detail::next(b,container)) \ 72 | if (bool ugly_and_unique_break = false) {} else \ 73 | for (item = boost::foreach_detail::deref(b,container); !ugly_and_unique_break; ugly_and_unique_break = true) 74 | } // end boost 75 | #endif 76 | --------------------------------------------------------------------------------