├── .gitignore ├── LICENSE ├── README.md ├── configs ├── data_name_mapping.json ├── feature_groups │ ├── char_col.tsv │ ├── par_col.tsv │ ├── rest_col.tsv │ ├── topic_col.tsv │ └── word_col.tsv └── types.json ├── demo ├── predict.py ├── pretrained_sato │ ├── model.pt │ └── sherlock_None.pt ├── screenshots │ ├── 1.png │ └── 2.png ├── server.py ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-social.css │ │ │ ├── bootstrap.css │ │ │ ├── bootstrap.css.map │ │ │ ├── bootstrap.min.css │ │ │ └── bootstrap.min.css.map │ │ └── js │ │ │ ├── bootstrap.bundle.js │ │ │ ├── bootstrap.bundle.js.map │ │ │ ├── bootstrap.bundle.min.js │ │ │ ├── bootstrap.bundle.min.js.map │ │ │ ├── bootstrap.js │ │ │ ├── bootstrap.js.map │ │ │ ├── bootstrap.min.js │ │ │ └── bootstrap.min.js.map │ └── jquery │ │ ├── jquery.js │ │ ├── jquery.min.js │ │ ├── jquery.min.map │ │ ├── jquery.slim.js │ │ ├── jquery.slim.min.js │ │ └── jquery.slim.min.map ├── templates │ ├── error.html │ ├── table.html │ ├── test.html │ └── upload.html └── uploads │ ├── University_employees.csv │ ├── files.csv │ └── theaters.csv ├── diagram-overview.svg ├── download_data.sh ├── extract ├── extract_features.py ├── extract_header.py ├── extract_matrix.py ├── extract_table.py ├── feature_extraction │ ├── __init__.py │ ├── sherlock_features.py │ └── topic_features_LDA.py ├── helpers │ ├── dateparser.py │ ├── general_helpers.py │ ├── read_raw_data.py │ ├── type_detection.py │ └── utils.py ├── out │ └── train_test_split │ │ ├── webtables1-p1_type78.json │ │ ├── webtables1-p1_type78_multi-col.json │ │ ├── webtables2-p1_type78.json │ │ └── webtables2-p1_type78_multi-col.json ├── raw_col_label_pairs.py └── split_train_test.py ├── model ├── co_occur_matrix │ └── matrix_co_W0_type78.npy ├── datasets.py ├── factor.py ├── models_sherlock.py ├── params │ ├── crf_configs │ │ ├── CRF+LDA.txt │ │ └── CRF.txt │ ├── sherlock+LDA.txt │ └── sherlock.txt ├── pre_trained_CRF │ └── type78 │ │ ├── CRF+LDA_pre.pt │ │ └── CRF_pre.pt ├── pre_trained_sherlock │ └── type78 │ │ ├── all_None.pt │ │ └── sherlock_None.pt ├── torchcrf │ └── __init__.py ├── train_CRF_LC.py └── train_sherlock.py ├── notebooks ├── FinalPlots-paper.ipynb └── plot_data │ ├── feature_importance_CRF_None.csv │ ├── feature_importance_CRF_num-directstr_thr-0_tn-400.csv │ ├── feature_importance_single_None.csv │ ├── feature_importance_single_num-directstr_thr-0_tn-400.csv │ ├── log_co-occur.csv │ ├── result_CRF_LDA_multi-col.csv │ ├── result_CRF_multi-col.csv │ ├── result_LDA_multi-col.csv │ ├── result_sherlock_multi-col.csv │ └── type-dist.csv ├── requirements.txt ├── scripts ├── analysis_functions.py ├── exp.sh ├── feature_importance.py └── per_type.py ├── sherlock └── features │ ├── bag_of_characters.py │ ├── bag_of_words.py │ ├── paragraph_vectors.py │ └── word_embeddings.py ├── table_data ├── README.md └── download.sh ├── topic_model ├── LDA_cluster.py └── train_LDA.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | results/ 4 | sherlock/pretrained/ 5 | tmp/ 6 | topic_model/LDA_cache/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/README.md -------------------------------------------------------------------------------- /configs/data_name_mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/data_name_mapping.json -------------------------------------------------------------------------------- /configs/feature_groups/char_col.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/feature_groups/char_col.tsv -------------------------------------------------------------------------------- /configs/feature_groups/par_col.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/feature_groups/par_col.tsv -------------------------------------------------------------------------------- /configs/feature_groups/rest_col.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/feature_groups/rest_col.tsv -------------------------------------------------------------------------------- /configs/feature_groups/topic_col.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/feature_groups/topic_col.tsv -------------------------------------------------------------------------------- /configs/feature_groups/word_col.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/feature_groups/word_col.tsv -------------------------------------------------------------------------------- /configs/types.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/configs/types.json -------------------------------------------------------------------------------- /demo/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/predict.py -------------------------------------------------------------------------------- /demo/pretrained_sato/model.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/pretrained_sato/model.pt -------------------------------------------------------------------------------- /demo/pretrained_sato/sherlock_None.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/pretrained_sato/sherlock_None.pt -------------------------------------------------------------------------------- /demo/screenshots/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/screenshots/1.png -------------------------------------------------------------------------------- /demo/screenshots/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/screenshots/2.png -------------------------------------------------------------------------------- /demo/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/server.py -------------------------------------------------------------------------------- /demo/static/bootstrap/css/bootstrap-social.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/css/bootstrap-social.css -------------------------------------------------------------------------------- /demo/static/bootstrap/css/bootstrap.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/css/bootstrap.css -------------------------------------------------------------------------------- /demo/static/bootstrap/css/bootstrap.css.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/css/bootstrap.css.map -------------------------------------------------------------------------------- /demo/static/bootstrap/css/bootstrap.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/css/bootstrap.min.css -------------------------------------------------------------------------------- /demo/static/bootstrap/css/bootstrap.min.css.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/css/bootstrap.min.css.map -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.bundle.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.bundle.js -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.bundle.js.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.bundle.js.map -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.bundle.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.bundle.min.js -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.bundle.min.js.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.bundle.min.js.map -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.js -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.js.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.js.map -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.min.js -------------------------------------------------------------------------------- /demo/static/bootstrap/js/bootstrap.min.js.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/bootstrap/js/bootstrap.min.js.map -------------------------------------------------------------------------------- /demo/static/jquery/jquery.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.js -------------------------------------------------------------------------------- /demo/static/jquery/jquery.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.min.js -------------------------------------------------------------------------------- /demo/static/jquery/jquery.min.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.min.map -------------------------------------------------------------------------------- /demo/static/jquery/jquery.slim.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.slim.js -------------------------------------------------------------------------------- /demo/static/jquery/jquery.slim.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.slim.min.js -------------------------------------------------------------------------------- /demo/static/jquery/jquery.slim.min.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/static/jquery/jquery.slim.min.map -------------------------------------------------------------------------------- /demo/templates/error.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/templates/error.html -------------------------------------------------------------------------------- /demo/templates/table.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/templates/table.html -------------------------------------------------------------------------------- /demo/templates/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/templates/test.html -------------------------------------------------------------------------------- /demo/templates/upload.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/templates/upload.html -------------------------------------------------------------------------------- /demo/uploads/University_employees.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/uploads/University_employees.csv -------------------------------------------------------------------------------- /demo/uploads/files.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/uploads/files.csv -------------------------------------------------------------------------------- /demo/uploads/theaters.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/demo/uploads/theaters.csv -------------------------------------------------------------------------------- /diagram-overview.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/diagram-overview.svg -------------------------------------------------------------------------------- /download_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/download_data.sh -------------------------------------------------------------------------------- /extract/extract_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/extract_features.py -------------------------------------------------------------------------------- /extract/extract_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/extract_header.py -------------------------------------------------------------------------------- /extract/extract_matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/extract_matrix.py -------------------------------------------------------------------------------- /extract/extract_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/extract_table.py -------------------------------------------------------------------------------- /extract/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extract/feature_extraction/sherlock_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/feature_extraction/sherlock_features.py -------------------------------------------------------------------------------- /extract/feature_extraction/topic_features_LDA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/feature_extraction/topic_features_LDA.py -------------------------------------------------------------------------------- /extract/helpers/dateparser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/helpers/dateparser.py -------------------------------------------------------------------------------- /extract/helpers/general_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/helpers/general_helpers.py -------------------------------------------------------------------------------- /extract/helpers/read_raw_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/helpers/read_raw_data.py -------------------------------------------------------------------------------- /extract/helpers/type_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/helpers/type_detection.py -------------------------------------------------------------------------------- /extract/helpers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/helpers/utils.py -------------------------------------------------------------------------------- /extract/out/train_test_split/webtables1-p1_type78.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/out/train_test_split/webtables1-p1_type78.json -------------------------------------------------------------------------------- /extract/out/train_test_split/webtables1-p1_type78_multi-col.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/out/train_test_split/webtables1-p1_type78_multi-col.json -------------------------------------------------------------------------------- /extract/out/train_test_split/webtables2-p1_type78.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/out/train_test_split/webtables2-p1_type78.json -------------------------------------------------------------------------------- /extract/out/train_test_split/webtables2-p1_type78_multi-col.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/out/train_test_split/webtables2-p1_type78_multi-col.json -------------------------------------------------------------------------------- /extract/raw_col_label_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/raw_col_label_pairs.py -------------------------------------------------------------------------------- /extract/split_train_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/extract/split_train_test.py -------------------------------------------------------------------------------- /model/co_occur_matrix/matrix_co_W0_type78.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/co_occur_matrix/matrix_co_W0_type78.npy -------------------------------------------------------------------------------- /model/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/datasets.py -------------------------------------------------------------------------------- /model/factor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/factor.py -------------------------------------------------------------------------------- /model/models_sherlock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/models_sherlock.py -------------------------------------------------------------------------------- /model/params/crf_configs/CRF+LDA.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/params/crf_configs/CRF+LDA.txt -------------------------------------------------------------------------------- /model/params/crf_configs/CRF.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/params/crf_configs/CRF.txt -------------------------------------------------------------------------------- /model/params/sherlock+LDA.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/params/sherlock+LDA.txt -------------------------------------------------------------------------------- /model/params/sherlock.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/params/sherlock.txt -------------------------------------------------------------------------------- /model/pre_trained_CRF/type78/CRF+LDA_pre.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/pre_trained_CRF/type78/CRF+LDA_pre.pt -------------------------------------------------------------------------------- /model/pre_trained_CRF/type78/CRF_pre.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/pre_trained_CRF/type78/CRF_pre.pt -------------------------------------------------------------------------------- /model/pre_trained_sherlock/type78/all_None.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/pre_trained_sherlock/type78/all_None.pt -------------------------------------------------------------------------------- /model/pre_trained_sherlock/type78/sherlock_None.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/pre_trained_sherlock/type78/sherlock_None.pt -------------------------------------------------------------------------------- /model/torchcrf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/torchcrf/__init__.py -------------------------------------------------------------------------------- /model/train_CRF_LC.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/train_CRF_LC.py -------------------------------------------------------------------------------- /model/train_sherlock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/model/train_sherlock.py -------------------------------------------------------------------------------- /notebooks/FinalPlots-paper.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/FinalPlots-paper.ipynb -------------------------------------------------------------------------------- /notebooks/plot_data/feature_importance_CRF_None.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/feature_importance_CRF_None.csv -------------------------------------------------------------------------------- /notebooks/plot_data/feature_importance_CRF_num-directstr_thr-0_tn-400.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/feature_importance_CRF_num-directstr_thr-0_tn-400.csv -------------------------------------------------------------------------------- /notebooks/plot_data/feature_importance_single_None.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/feature_importance_single_None.csv -------------------------------------------------------------------------------- /notebooks/plot_data/feature_importance_single_num-directstr_thr-0_tn-400.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/feature_importance_single_num-directstr_thr-0_tn-400.csv -------------------------------------------------------------------------------- /notebooks/plot_data/log_co-occur.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/log_co-occur.csv -------------------------------------------------------------------------------- /notebooks/plot_data/result_CRF_LDA_multi-col.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/result_CRF_LDA_multi-col.csv -------------------------------------------------------------------------------- /notebooks/plot_data/result_CRF_multi-col.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/result_CRF_multi-col.csv -------------------------------------------------------------------------------- /notebooks/plot_data/result_LDA_multi-col.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/result_LDA_multi-col.csv -------------------------------------------------------------------------------- /notebooks/plot_data/result_sherlock_multi-col.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/result_sherlock_multi-col.csv -------------------------------------------------------------------------------- /notebooks/plot_data/type-dist.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/notebooks/plot_data/type-dist.csv -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/analysis_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/scripts/analysis_functions.py -------------------------------------------------------------------------------- /scripts/exp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/scripts/exp.sh -------------------------------------------------------------------------------- /scripts/feature_importance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/scripts/feature_importance.py -------------------------------------------------------------------------------- /scripts/per_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/scripts/per_type.py -------------------------------------------------------------------------------- /sherlock/features/bag_of_characters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/sherlock/features/bag_of_characters.py -------------------------------------------------------------------------------- /sherlock/features/bag_of_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/sherlock/features/bag_of_words.py -------------------------------------------------------------------------------- /sherlock/features/paragraph_vectors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/sherlock/features/paragraph_vectors.py -------------------------------------------------------------------------------- /sherlock/features/word_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/sherlock/features/word_embeddings.py -------------------------------------------------------------------------------- /table_data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/table_data/README.md -------------------------------------------------------------------------------- /table_data/download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/table_data/download.sh -------------------------------------------------------------------------------- /topic_model/LDA_cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/topic_model/LDA_cluster.py -------------------------------------------------------------------------------- /topic_model/train_LDA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/topic_model/train_LDA.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/sato/HEAD/utils.py --------------------------------------------------------------------------------