├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── barplot_evaluation.png ├── datasets ├── SNAP.Hashtags.Segmented.w.Heuristics.txt ├── Test-Stanford.txt ├── binkley.csv ├── bt11.csv ├── jhotdraw.txt ├── loyola-udelaware-identifier-splitting-oracle.txt ├── lynx.txt ├── stan_large_dev.csv ├── stan_large_test.csv ├── stan_large_train.csv └── stan_small.csv ├── hashformers.ipynb ├── hashformers.png ├── requirements.txt ├── scripts └── evaluate_ekphrasis.py ├── setup.cfg ├── setup.py ├── src └── hashformers │ ├── __init__.py │ ├── beamsearch │ ├── __init__.py │ ├── algorithm.py │ ├── bert_lm.py │ ├── data_structures.py │ ├── gpt2_lm.py │ ├── minicons_lm.py │ ├── model_lm.py │ └── reranker.py │ ├── ensemble │ ├── __init__.py │ └── top2_fusion.py │ ├── evaluation │ ├── __init__.py │ ├── modeler.py │ └── utils.py │ ├── experiments │ ├── __init__.py │ ├── evaluation.py │ └── utils.py │ └── segmenter │ ├── __init__.py │ ├── auto.py │ ├── base_segmenter.py │ ├── data_structures.py │ ├── regex_segmenter.py │ └── segmenter.py ├── tests └── test_segmenter.py └── tutorials └── EVALUATION.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/README.md -------------------------------------------------------------------------------- /barplot_evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/barplot_evaluation.png -------------------------------------------------------------------------------- /datasets/SNAP.Hashtags.Segmented.w.Heuristics.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/SNAP.Hashtags.Segmented.w.Heuristics.txt -------------------------------------------------------------------------------- /datasets/Test-Stanford.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/Test-Stanford.txt -------------------------------------------------------------------------------- /datasets/binkley.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/binkley.csv -------------------------------------------------------------------------------- /datasets/bt11.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/bt11.csv -------------------------------------------------------------------------------- /datasets/jhotdraw.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/jhotdraw.txt -------------------------------------------------------------------------------- /datasets/loyola-udelaware-identifier-splitting-oracle.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/loyola-udelaware-identifier-splitting-oracle.txt -------------------------------------------------------------------------------- /datasets/lynx.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/lynx.txt -------------------------------------------------------------------------------- /datasets/stan_large_dev.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/stan_large_dev.csv -------------------------------------------------------------------------------- /datasets/stan_large_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/stan_large_test.csv -------------------------------------------------------------------------------- /datasets/stan_large_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/stan_large_train.csv -------------------------------------------------------------------------------- /datasets/stan_small.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/datasets/stan_small.csv -------------------------------------------------------------------------------- /hashformers.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/hashformers.ipynb -------------------------------------------------------------------------------- /hashformers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/hashformers.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | minicons 2 | twitter-text-python 3 | ekphrasis 4 | pandas -------------------------------------------------------------------------------- /scripts/evaluate_ekphrasis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/scripts/evaluate_ekphrasis.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description_file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/setup.py -------------------------------------------------------------------------------- /src/hashformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/__init__.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/hashformers/beamsearch/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/algorithm.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/bert_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/bert_lm.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/data_structures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/data_structures.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/gpt2_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/gpt2_lm.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/minicons_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/minicons_lm.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/model_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/model_lm.py -------------------------------------------------------------------------------- /src/hashformers/beamsearch/reranker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/beamsearch/reranker.py -------------------------------------------------------------------------------- /src/hashformers/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/hashformers/ensemble/top2_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/ensemble/top2_fusion.py -------------------------------------------------------------------------------- /src/hashformers/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeler import * -------------------------------------------------------------------------------- /src/hashformers/evaluation/modeler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/evaluation/modeler.py -------------------------------------------------------------------------------- /src/hashformers/evaluation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/evaluation/utils.py -------------------------------------------------------------------------------- /src/hashformers/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/hashformers/experiments/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/experiments/evaluation.py -------------------------------------------------------------------------------- /src/hashformers/experiments/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/experiments/utils.py -------------------------------------------------------------------------------- /src/hashformers/segmenter/__init__.py: -------------------------------------------------------------------------------- 1 | from .segmenter import * -------------------------------------------------------------------------------- /src/hashformers/segmenter/auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/segmenter/auto.py -------------------------------------------------------------------------------- /src/hashformers/segmenter/base_segmenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/segmenter/base_segmenter.py -------------------------------------------------------------------------------- /src/hashformers/segmenter/data_structures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/segmenter/data_structures.py -------------------------------------------------------------------------------- /src/hashformers/segmenter/regex_segmenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/segmenter/regex_segmenter.py -------------------------------------------------------------------------------- /src/hashformers/segmenter/segmenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/src/hashformers/segmenter/segmenter.py -------------------------------------------------------------------------------- /tests/test_segmenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/tests/test_segmenter.py -------------------------------------------------------------------------------- /tutorials/EVALUATION.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruanchaves/hashformers/HEAD/tutorials/EVALUATION.md --------------------------------------------------------------------------------