├── .github
    └── dependabot.yml
├── .gitignore
├── LICENSE
├── README.md
├── api
    ├── Dockerfile
    ├── Pipfile
    ├── Pipfile.lock
    ├── app.py
    ├── boot.sh
    ├── model_setup.py
    ├── plugin_watcher.py
    └── utils
    │   ├── abort.py
    │   ├── aio.py
    │   ├── article_download.py
    │   ├── cancel.py
    │   ├── pdf.py
    │   ├── request.py
    │   ├── semantic.py
    │   └── sentence.py
├── cli
    ├── __init__.py
    ├── config.py
    ├── docker_interface.py
    ├── exceptions.py
    ├── git_interface.py
    ├── plugins.py
    ├── schema.py
    └── utils.py
├── configure.py
├── debug_plugin
    ├── Pipfile
    ├── Pipfile.lock
    ├── metric.py
    ├── model_setup.py
    ├── summarizer.py
    └── sw-plugin-config.yaml
├── demo-files
    ├── example.jsonl
    ├── generated.txt
    ├── references.txt
    └── viewer-example.json
├── docker
    └── Dockerfile.plugin
├── docs
    ├── .gitignore
    ├── README.md
    ├── babel.config.js
    ├── docs
    │   ├── api-documentation.md
    │   ├── configuration.md
    │   ├── deployment.md
    │   ├── setup_quickstart.md
    │   ├── usage.md
    │   └── writing-a-plugin.md
    ├── docusaurus.config.js
    ├── package-lock.json
    ├── package.json
    ├── sidebars.js
    ├── src
    │   ├── css
    │   │   └── custom.css
    │   └── pages
    │   │   └── index.module.css
    └── static
    │   ├── .nojekyll
    │   ├── evaluation_input.gif
    │   ├── evaluation_plotter.gif
    │   ├── evaluation_scores.gif
    │   ├── evaluation_visualization.gif
    │   ├── img
    │       └── favicon.png
    │   ├── summarize_input.gif
    │   ├── summarize_pdf_extract.gif
    │   └── summarize_usage.gif
├── frontend
    ├── .dockerignore
    ├── .eslintrc.yml
    ├── .prettierrc.json
    ├── Dockerfile
    ├── Dockerfile.dev
    ├── nginx.conf
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.js
    ├── public
    │   ├── index.html
    │   └── static
    │   │   └── example.jsonl
    ├── src
    │   ├── App.js
    │   ├── api.js
    │   ├── components
    │   │   ├── About.js
    │   │   ├── CompareTable.js
    │   │   ├── Evaluate.js
    │   │   ├── Model.js
    │   │   ├── OneHypRef.js
    │   │   ├── Result.js
    │   │   ├── Saved.js
    │   │   ├── ScoreWorkbench.js
    │   │   ├── Settings.js
    │   │   ├── Summarize.js
    │   │   ├── Upload.js
    │   │   └── utils
    │   │   │   ├── Arguments.js
    │   │   │   ├── Badge.js
    │   │   │   ├── Button.js
    │   │   │   ├── Card.js
    │   │   │   ├── ChooseFile.js
    │   │   │   ├── Container.js
    │   │   │   ├── Disclosure.js
    │   │   │   ├── Error.js
    │   │   │   ├── Form.js
    │   │   │   ├── FuzzySearch.js
    │   │   │   ├── Icons.js
    │   │   │   ├── Layout.js
    │   │   │   ├── Loading.js
    │   │   │   ├── Markup.js
    │   │   │   ├── Modal.js
    │   │   │   ├── Pagination.js
    │   │   │   ├── Radio.js
    │   │   │   ├── Range.js
    │   │   │   ├── Spinner.js
    │   │   │   ├── Table.js
    │   │   │   ├── Tabs.js
    │   │   │   ├── Text.js
    │   │   │   ├── Toggle.js
    │   │   │   └── Tooltip.js
    │   ├── config.js
    │   ├── contexts
    │   │   ├── DragContext.js
    │   │   ├── HoverContext.js
    │   │   ├── MetricsContext.js
    │   │   ├── SettingsContext.js
    │   │   └── SummarizersContext.js
    │   ├── css
    │   │   └── App.css
    │   ├── hooks
    │   │   ├── abortController.js
    │   │   ├── calculations.js
    │   │   ├── list.js
    │   │   ├── markup.js
    │   │   └── plugins.js
    │   ├── index.css
    │   ├── index.js
    │   ├── request.js
    │   └── utils
    │   │   ├── color.js
    │   │   ├── common.js
    │   │   ├── export.js
    │   │   ├── flatScores.js
    │   │   ├── markup.js
    │   │   ├── python.js
    │   │   ├── readFile.js
    │   │   └── saved.js
    └── tailwind.config.js
├── grobid
    ├── Dockerfile
    └── config.yaml
├── jsonl_converter.py
├── metrics
    ├── bartscore
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── bart_score.py
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── bertscore
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── bleu
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric
    │   │   ├── __init__.py
    │   │   └── bleu.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── bleurt
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── cider
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── license.txt
    │   ├── metric
    │   │   ├── __init__.py
    │   │   ├── cider.py
    │   │   └── cider_scorer.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── greedy_matching
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── meteor
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── license.txt
    │   ├── metric
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   └── paraphrase-en.gz
    │   │   ├── meteor-1.5.jar
    │   │   └── meteor.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── moverscore
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── rouge
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    ├── sbert
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
    └── spacy_similarity
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── metric.py
    │   ├── model_setup.py
    │   └── sw-plugin-config.yaml
├── other
    ├── interesting_findings.md
    └── static
    │   ├── BLEURT_BERTScore.png
    │   ├── MoverScore_BERTScore.png
    │   ├── SBERT_BERTScore.png
    │   ├── T5-11B_T5-3B.png
    │   ├── example_low-BARTScore_medium-BERTScore.png
    │   ├── example_medium-BARTScore_low-BERTScore.png
    │   ├── select_BLEURT_BARTScore.png
    │   ├── select_BLEU_BERTScore.png
    │   ├── semantic_lexical_variance.png
    │   └── spacy_similarity_BERTScore.png
├── plugin_config
    └── .gitkeep
├── plugin_server
    ├── app.py
    ├── application.py
    ├── argument_models.py
    ├── dev.boot.sh
    ├── errors.py
    ├── manager
    │   ├── request.py
    │   └── websocket.py
    ├── metric_factory.py
    ├── requirements.txt
    ├── summarizer_factory.py
    ├── utils
    │   ├── aio.py
    │   ├── cache.py
    │   ├── event.py
    │   ├── pipe.py
    │   └── thread.py
    └── workers.py
├── requirements.txt
├── schema
    └── .gitignore
├── summarizers
    ├── aosumm
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   ├── summarizer.py
    │   │   └── transformer_summarizer.py
    │   └── sw-plugin-config.yaml
    ├── argpagerank
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── argument.py
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── bertsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── BertParent.py
    │   │   ├── ClusterFeatures.py
    │   │   ├── ModelProcessor.py
    │   │   └── __init__.py
    │   └── sw-plugin-config.yaml
    ├── biasedtextrank
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── brio
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   ├── summarizer.py
    │   │   └── transformer_summarizer.py
    │   └── sw-plugin-config.yaml
    ├── cliffsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   ├── download.py
    │   │   ├── summarizer.py
    │   │   └── transformer_summarizer.py
    │   └── sw-plugin-config.yaml
    ├── conclugen
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   ├── download.py
    │   │   ├── summarizer.py
    │   │   └── transformer_summarizer.py
    │   └── sw-plugin-config.yaml
    ├── coopsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   └── coop
    │   │   │   ├── config
    │   │   │       ├── bimeanvae
    │   │   │       │   ├── amzn.jsonnet
    │   │   │       │   └── yelp.jsonnet
    │   │   │       ├── optimus
    │   │   │       │   ├── amzn.jsonnet
    │   │   │       │   └── yelp.jsonnet
    │   │   │       └── utils.libsonnet
    │   │   │   ├── coop
    │   │   │       ├── __init__.py
    │   │   │       ├── models
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── base.py
    │   │   │       │   ├── bimeanvae.py
    │   │   │       │   ├── optimus.py
    │   │   │       │   └── util.py
    │   │   │       ├── reader.py
    │   │   │       ├── search.py
    │   │   │       ├── tokenizer.py
    │   │   │       ├── util.py
    │   │   │       └── vae.py
    │   │   │   └── scripts
    │   │   │       ├── get_summ.py
    │   │   │       ├── preprocess.py
    │   │   │       └── spm_train.py
    │   └── sw-plugin-config.yaml
    ├── featuresum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   └── scores
    │   │   │   ├── __init__.py
    │   │   │   ├── average_lexical_connectivity.py
    │   │   │   ├── content_words_ratio.py
    │   │   │   ├── length.py
    │   │   │   ├── position.py
    │   │   │   ├── rank.py
    │   │   │   ├── special_tokens.py
    │   │   │   ├── tfidf.py
    │   │   │   ├── util.py
    │   │   │   └── word_overlap.py
    │   └── sw-plugin-config.yaml
    ├── gsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── guided_summarization
    │   │   └── bart
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── fairseq
    │   │   │       ├── __init__.py
    │   │   │       ├── benchmark
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── dummy_lm.py
    │   │   │       │   ├── dummy_masked_lm.py
    │   │   │       │   └── dummy_model.py
    │   │   │       ├── binarizer.py
    │   │   │       ├── bleu.py
    │   │   │       ├── checkpoint_utils.py
    │   │   │       ├── clib
    │   │   │       │   ├── libbleu
    │   │   │       │   │   ├── libbleu.cpp
    │   │   │       │   │   └── module.cpp
    │   │   │       │   ├── libnat
    │   │   │       │   │   └── edit_dist.cpp
    │   │   │       │   └── libnat_cuda
    │   │   │       │   │   ├── binding.cpp
    │   │   │       │   │   ├── edit_dist.cu
    │   │   │       │   │   └── edit_dist.h
    │   │   │       ├── criterions
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── adaptive_loss.py
    │   │   │       │   ├── binary_cross_entropy.py
    │   │   │       │   ├── composite_loss.py
    │   │   │       │   ├── cross_entropy.py
    │   │   │       │   ├── fairseq_criterion.py
    │   │   │       │   ├── guided_label_smoothed_cross_entropy.py
    │   │   │       │   ├── label_smoothed_cross_entropy.py
    │   │   │       │   ├── label_smoothed_cross_entropy_with_alignment.py
    │   │   │       │   ├── legacy_masked_lm.py
    │   │   │       │   ├── masked_lm.py
    │   │   │       │   ├── nat_loss.py
    │   │   │       │   ├── sentence_prediction.py
    │   │   │       │   └── sentence_ranking.py
    │   │   │       ├── data
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── append_token_dataset.py
    │   │   │       │   ├── audio
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── raw_audio_dataset.py
    │   │   │       │   ├── backtranslation_dataset.py
    │   │   │       │   ├── base_wrapper_dataset.py
    │   │   │       │   ├── colorize_dataset.py
    │   │   │       │   ├── concat_dataset.py
    │   │   │       │   ├── concat_sentences_dataset.py
    │   │   │       │   ├── data_utils.py
    │   │   │       │   ├── data_utils_fast.cpp
    │   │   │       │   ├── data_utils_fast.pyx
    │   │   │       │   ├── denoising_dataset.py
    │   │   │       │   ├── dictionary.py
    │   │   │       │   ├── encoders
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── byte_bpe.py
    │   │   │       │   │   ├── byte_utils.py
    │   │   │       │   │   ├── bytes.py
    │   │   │       │   │   ├── characters.py
    │   │   │       │   │   ├── fastbpe.py
    │   │   │       │   │   ├── gpt2_bpe.py
    │   │   │       │   │   ├── gpt2_bpe_utils.py
    │   │   │       │   │   ├── hf_bert_bpe.py
    │   │   │       │   │   ├── hf_byte_bpe.py
    │   │   │       │   │   ├── moses_tokenizer.py
    │   │   │       │   │   ├── nltk_tokenizer.py
    │   │   │       │   │   ├── sentencepiece_bpe.py
    │   │   │       │   │   ├── space_tokenizer.py
    │   │   │       │   │   ├── subword_nmt_bpe.py
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── fairseq_dataset.py
    │   │   │       │   ├── guided_language_pair_dataset.py
    │   │   │       │   ├── id_dataset.py
    │   │   │       │   ├── indexed_dataset.py
    │   │   │       │   ├── iterators.py
    │   │   │       │   ├── language_pair_dataset.py
    │   │   │       │   ├── legacy
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── block_pair_dataset.py
    │   │   │       │   │   ├── masked_lm_dataset.py
    │   │   │       │   │   └── masked_lm_dictionary.py
    │   │   │       │   ├── list_dataset.py
    │   │   │       │   ├── lm_context_window_dataset.py
    │   │   │       │   ├── lru_cache_dataset.py
    │   │   │       │   ├── mask_tokens_dataset.py
    │   │   │       │   ├── monolingual_dataset.py
    │   │   │       │   ├── multi_corpus_sampled_dataset.py
    │   │   │       │   ├── nested_dictionary_dataset.py
    │   │   │       │   ├── noising.py
    │   │   │       │   ├── num_samples_dataset.py
    │   │   │       │   ├── numel_dataset.py
    │   │   │       │   ├── offset_tokens_dataset.py
    │   │   │       │   ├── pad_dataset.py
    │   │   │       │   ├── plasma_utils.py
    │   │   │       │   ├── prepend_dataset.py
    │   │   │       │   ├── prepend_token_dataset.py
    │   │   │       │   ├── raw_label_dataset.py
    │   │   │       │   ├── replace_dataset.py
    │   │   │       │   ├── resampling_dataset.py
    │   │   │       │   ├── roll_dataset.py
    │   │   │       │   ├── round_robin_zip_datasets.py
    │   │   │       │   ├── sharded_dataset.py
    │   │   │       │   ├── sort_dataset.py
    │   │   │       │   ├── strip_token_dataset.py
    │   │   │       │   ├── subsample_dataset.py
    │   │   │       │   ├── token_block_dataset.py
    │   │   │       │   ├── token_block_utils_fast.cpp
    │   │   │       │   ├── token_block_utils_fast.pyx
    │   │   │       │   ├── transform_eos_dataset.py
    │   │   │       │   ├── transform_eos_lang_pair_dataset.py
    │   │   │       │   └── truncate_dataset.py
    │   │   │       ├── distributed_utils.py
    │   │   │       ├── file_io.py
    │   │   │       ├── file_utils.py
    │   │   │       ├── hub_utils.py
    │   │   │       ├── incremental_decoding_utils.py
    │   │   │       ├── iterative_refinement_generator.py
    │   │   │       ├── legacy_distributed_data_parallel.py
    │   │   │       ├── logging
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── meters.py
    │   │   │       │   ├── metrics.py
    │   │   │       │   └── progress_bar.py
    │   │   │       ├── model_parallel
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── criterions
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── vocab_parallel_cross_entropy.py
    │   │   │       │   ├── megatron_trainer.py
    │   │   │       │   ├── models
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── transformer.py
    │   │   │       │   │   └── transformer_lm.py
    │   │   │       │   └── modules
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── multihead_attention.py
    │   │   │       │   │   └── transformer_layer.py
    │   │   │       ├── models
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── bart
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── guided_hub_interface.py
    │   │   │       │   │   ├── guided_model.py
    │   │   │       │   │   ├── hub_interface.py
    │   │   │       │   │   └── model.py
    │   │   │       │   ├── composite_encoder.py
    │   │   │       │   ├── distributed_fairseq_model.py
    │   │   │       │   ├── fairseq_decoder.py
    │   │   │       │   ├── fairseq_encoder.py
    │   │   │       │   ├── fairseq_incremental_decoder.py
    │   │   │       │   ├── fairseq_model.py
    │   │   │       │   ├── fconv.py
    │   │   │       │   ├── fconv_lm.py
    │   │   │       │   ├── fconv_self_att.py
    │   │   │       │   ├── guided_transformer.py
    │   │   │       │   ├── huggingface
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── hf_gpt2.py
    │   │   │       │   ├── lightconv.py
    │   │   │       │   ├── lightconv_lm.py
    │   │   │       │   ├── lstm.py
    │   │   │       │   ├── lstm_lm.py
    │   │   │       │   ├── masked_lm.py
    │   │   │       │   ├── model_utils.py
    │   │   │       │   ├── multilingual_transformer.py
    │   │   │       │   ├── nat
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── cmlm_transformer.py
    │   │   │       │   │   ├── fairseq_nat_model.py
    │   │   │       │   │   ├── insertion_transformer.py
    │   │   │       │   │   ├── iterative_nonautoregressive_transformer.py
    │   │   │       │   │   ├── levenshtein_transformer.py
    │   │   │       │   │   ├── levenshtein_utils.py
    │   │   │       │   │   ├── nat_crf_transformer.py
    │   │   │       │   │   ├── nonautoregressive_ensembles.py
    │   │   │       │   │   └── nonautoregressive_transformer.py
    │   │   │       │   ├── roberta
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── alignment_utils.py
    │   │   │       │   │   ├── hub_interface.py
    │   │   │       │   │   ├── model.py
    │   │   │       │   │   ├── model_camembert.py
    │   │   │       │   │   └── model_xlmr.py
    │   │   │       │   ├── transformer.py
    │   │   │       │   ├── transformer_from_pretrained_xlm.py
    │   │   │       │   ├── transformer_lm.py
    │   │   │       │   └── wav2vec.py
    │   │   │       ├── modules
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── adaptive_input.py
    │   │   │       │   ├── adaptive_softmax.py
    │   │   │       │   ├── beamable_mm.py
    │   │   │       │   ├── character_token_embedder.py
    │   │   │       │   ├── conv_tbc.py
    │   │   │       │   ├── cross_entropy.py
    │   │   │       │   ├── cuda_utils.cu
    │   │   │       │   ├── downsampled_multihead_attention.py
    │   │   │       │   ├── dynamic_convolution.py
    │   │   │       │   ├── dynamic_crf_layer.py
    │   │   │       │   ├── dynamicconv_layer
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── cuda_function_gen.py
    │   │   │       │   │   ├── dynamicconv_cuda.cpp
    │   │   │       │   │   ├── dynamicconv_cuda.cuh
    │   │   │       │   │   ├── dynamicconv_cuda_kernel.cu
    │   │   │       │   │   ├── dynamicconv_layer.py
    │   │   │       │   │   ├── dynamiconv_cpu.cpp
    │   │   │       │   │   └── setup.py
    │   │   │       │   ├── fp32_group_norm.py
    │   │   │       │   ├── gelu.py
    │   │   │       │   ├── grad_multiply.py
    │   │   │       │   ├── guided_transformer_layer.py
    │   │   │       │   ├── gumbel_vector_quantizer.py
    │   │   │       │   ├── kmeans_vector_quantizer.py
    │   │   │       │   ├── layer_norm.py
    │   │   │       │   ├── learned_positional_embedding.py
    │   │   │       │   ├── lightconv_layer
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── cuda_function_gen.py
    │   │   │       │   │   ├── lightconv_cuda.cpp
    │   │   │       │   │   ├── lightconv_cuda.cuh
    │   │   │       │   │   ├── lightconv_cuda_kernel.cu
    │   │   │       │   │   ├── lightconv_layer.py
    │   │   │       │   │   └── setup.py
    │   │   │       │   ├── lightweight_convolution.py
    │   │   │       │   ├── linearized_convolution.py
    │   │   │       │   ├── multihead_attention.py
    │   │   │       │   ├── positional_embedding.py
    │   │   │       │   ├── scalar_bias.py
    │   │   │       │   ├── sinusoidal_positional_embedding.py
    │   │   │       │   ├── sparse_multihead_attention.py
    │   │   │       │   ├── sparse_transformer_sentence_encoder.py
    │   │   │       │   ├── sparse_transformer_sentence_encoder_layer.py
    │   │   │       │   ├── transformer_layer.py
    │   │   │       │   ├── transformer_sentence_encoder.py
    │   │   │       │   ├── transformer_sentence_encoder_layer.py
    │   │   │       │   ├── unfold.py
    │   │   │       │   └── vggblock.py
    │   │   │       ├── nan_detector.py
    │   │   │       ├── optim
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── adadelta.py
    │   │   │       │   ├── adafactor.py
    │   │   │       │   ├── adagrad.py
    │   │   │       │   ├── adam.py
    │   │   │       │   ├── adamax.py
    │   │   │       │   ├── bmuf.py
    │   │   │       │   ├── fairseq_optimizer.py
    │   │   │       │   ├── fp16_optimizer.py
    │   │   │       │   ├── fused_adam.py
    │   │   │       │   ├── fused_lamb.py
    │   │   │       │   ├── lr_scheduler
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── cosine_lr_scheduler.py
    │   │   │       │   │   ├── fairseq_lr_scheduler.py
    │   │   │       │   │   ├── fixed_schedule.py
    │   │   │       │   │   ├── inverse_square_root_schedule.py
    │   │   │       │   │   ├── polynomial_decay_schedule.py
    │   │   │       │   │   ├── reduce_lr_on_plateau.py
    │   │   │       │   │   ├── tri_stage_lr_scheduler.py
    │   │   │       │   │   └── triangular_lr_scheduler.py
    │   │   │       │   ├── nag.py
    │   │   │       │   └── sgd.py
    │   │   │       ├── options.py
    │   │   │       ├── pdb.py
    │   │   │       ├── registry.py
    │   │   │       ├── search.py
    │   │   │       ├── sequence_generator.py
    │   │   │       ├── sequence_scorer.py
    │   │   │       ├── tasks
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── audio_pretraining.py
    │   │   │       │   ├── cross_lingual_lm.py
    │   │   │       │   ├── denoising.py
    │   │   │       │   ├── fairseq_task.py
    │   │   │       │   ├── guided_translation.py
    │   │   │       │   ├── language_modeling.py
    │   │   │       │   ├── legacy_masked_lm.py
    │   │   │       │   ├── masked_lm.py
    │   │   │       │   ├── multilingual_denoising.py
    │   │   │       │   ├── multilingual_masked_lm.py
    │   │   │       │   ├── multilingual_translation.py
    │   │   │       │   ├── semisupervised_translation.py
    │   │   │       │   ├── sentence_prediction.py
    │   │   │       │   ├── sentence_ranking.py
    │   │   │       │   ├── translation.py
    │   │   │       │   ├── translation_from_pretrained_bart.py
    │   │   │       │   ├── translation_from_pretrained_xlm.py
    │   │   │       │   └── translation_lev.py
    │   │   │       ├── tokenizer.py
    │   │   │       ├── trainer.py
    │   │   │       └── utils.py
    │   │   │   ├── fairseq_cli
    │   │   │       ├── __init__.py
    │   │   │       ├── generate.py
    │   │   │       ├── guided_preprocess.py
    │   │   │       ├── interactive.py
    │   │   │       ├── preprocess.py
    │   │   │       ├── score.py
    │   │   │       ├── train.py
    │   │   │       └── validate.py
    │   │   │   ├── generate.py
    │   │   │   ├── hubconf.py
    │   │   │   ├── interactive.py
    │   │   │   ├── preprocess.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── score.py
    │   │   │   ├── setup.py
    │   │   │   ├── test.py
    │   │   │   ├── train.py
    │   │   │   ├── validate.py
    │   │   │   ├── z_bin.sh
    │   │   │   ├── z_bpe.sh
    │   │   │   ├── z_test.py
    │   │   │   ├── z_test.sh
    │   │   │   └── z_train.sh
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── lobart
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── localattn.py
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── longformer2roberta
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   └── __init__.py
    │   └── sw-plugin-config.yaml
    ├── neuralsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   ├── __init__.py
    │   │   ├── summarizer.py
    │   │   └── transformer_summarizer.py
    │   └── sw-plugin-config.yaml
    ├── newspaper3k
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer
    │   │   └── __init__.py
    │   └── sw-plugin-config.yaml
    ├── pmisum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── gpt2.py
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── positionrank
    │   ├── model_setup.py
    │   ├── requirements.txt
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── schnitsum
    │   ├── Pipfile
    │   ├── Pipfile.lock
    │   ├── model_setup.py
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    ├── textrank
    │   ├── model_setup.py
    │   ├── requirements.txt
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
    └── topicrank
    │   ├── model_setup.py
    │   ├── requirements.txt
    │   ├── summarizer.py
    │   └── sw-plugin-config.yaml
├── summary-workbench.py
├── sw-config.yaml
├── templates
    ├── docker
    │   ├── api.yaml
    │   ├── frontend.yaml
    │   ├── grobid.yaml
    │   └── mongo.yaml
    └── kubernetes
    │   ├── basic
    │       ├── api.yaml
    │       ├── frontend.yaml
    │       ├── grobid.yaml
    │       ├── ingress.yaml
    │       ├── mongodb.yaml
    │       └── proxy.yaml
    │   ├── plugin.yaml
    │   ├── token_secrets.yaml
    │   └── volumes.yaml
└── version.json


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates: []  # No automatic version updates because there is no CI to verify them.
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright © 2021 Dominik Schwabe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
21 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Summary Workbench
 2 | 
 3 | Unifying the application and evaluation of text summarization models. [[Paper](https://arxiv.org/pdf/2210.09587.pdf)] [[Documentation](https://webis-de.github.io/summary-workbench/)] [[Live Demo](https://tldr.demo.webis.de)]
 4 | >Accepted at EMNLP 2022 (Demo track). :tada: :tada:
 5 | 
 6 | #### :loudspeaker: Updates (3-12-2022) 
 7 | 1. Integrated 2 new models and their variants (6 in total): [BRIO](https://arxiv.org/abs/2203.16804) trained on news, [Schnitsum](https://github.com/sobamchan/schnitsum) trained on scholary documents.
 8 | 2. Integrated [contrastive search](https://huggingface.co/blog/introducing-csearch) for more fluent summaries. User can now toggle between regular and contrasitve search for supported models. 
 9 | 2. Improvements to the UI responsiveness on smaller devices.
10 | 
11 | ## Summarize
12 | 
13 | ### Create a Request
14 | 
15 | ![Create a Request](docs/static/summarize_input.gif)
16 | 
17 | ### Inspect the Results
18 | 
19 | ![Inspect the Results](docs/static/summarize_usage.gif)
20 | 
21 | # Evaluate
22 | 
23 | ### Create a Request
24 | 
25 | ![Create a Request](docs/static/evaluation_input.gif)
26 | 
27 | ### Inspect the Results
28 | 
29 | #### Scores
30 | 
31 | ![Scores](docs/static/evaluation_scores.gif)
32 | 
33 | #### Visualize Text Examples
34 | 
35 | ![Visualize Text Examples](docs/static/evaluation_visualization.gif)
36 | 
37 | #### Plot Scores against each other
38 | 
39 | ![Plot Scores against each other](docs/static/evaluation_plotter.gif)
40 | 


--------------------------------------------------------------------------------
/api/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10
 2 | 
 3 | COPY . /app
 4 | WORKDIR /app
 5 | RUN pip install pipenv && pipenv install --system
 6 | RUN python model_setup.py
 7 | 
 8 | CMD uvicorn app:app --host 0.0.0.0 --port 5000
 9 | 
10 | 
11 | # vi: ft=dockerfile
12 | 


--------------------------------------------------------------------------------
/api/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | doc2json = {git = "https://github.com/allenai/s2orc-doc2json"}
 8 | newspaper3k = "*"
 9 | nltk = "*"
10 | beautifulsoup4 = "*"
11 | boto3 = "*"
12 | aiohttp = "*"
13 | lxml = "*"
14 | python-multipart = "*"
15 | python-magic = "*"
16 | latex2mathml = "*"
17 | fastapi = "*"
18 | uvicorn = "*"
19 | spacy = "*"
20 | pymongo = {extras = ["srv"], version = "*"}
21 | 
22 | [dev-packages]
23 | 
24 | [requires]
25 | python_version = "3.11"
26 | 


--------------------------------------------------------------------------------
/api/boot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export VIRTUAL_ENV=/root/.venv
 4 | 
 5 | VENV_EXISTS=false
 6 | if [[ -r $VIRTUAL_ENV/pyvenv.cfg ]]; then
 7 |   VENV_PYTHON_VERSION=$(sed -n '/^version/ s/[^0-9.]//g p' $VIRTUAL_ENV/pyvenv.cfg)
 8 |   GLOBAL_PYTHON_VERSION=$(python --version | sed -n 's/[^0-9.]//g p')
 9 |   if [[ $VENV_PYTHON_VERSION == $GLOBAL_PYTHON_VERSION ]]; then
10 |     VENV_EXISTS=true
11 |   fi
12 | fi
13 | 
14 | if [[ $VENV_EXISTS == "false" ]]; then
15 |   echo "no valid virtualenv found, creating ..."
16 |   rm -rf $VIRTUAL_ENV
17 |   python -m venv $VIRTUAL_ENV || exit 1
18 | fi
19 | 
20 | source $VIRTUAL_ENV/bin/activate || exit 1
21 | 
22 | cd /app || exit 1
23 | 
24 | pip install pipenv || exit 1
25 | pipenv install || exit 1
26 | 
27 | python model_setup.py
28 | uvicorn app:app --app-dir /app --host 0.0.0.0 --port 5000 --reload
29 | 


--------------------------------------------------------------------------------
/api/model_setup.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | import spacy
 3 | 
 4 | SPACY_MODEL = "en_core_web_md"
 5 | 
 6 | 
 7 | def setup():
 8 |     spacy.cli.download(SPACY_MODEL)
 9 |     nltk.download("punkt")
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     setup()
14 | 


--------------------------------------------------------------------------------
/api/utils/abort.py:
--------------------------------------------------------------------------------
1 | from sys import stderr
2 | 
3 | 
4 | def aborter(message):
5 |     if not hasattr(aborter, "was_aborted"):
6 |         aborter.was_aborted = True
7 |         print(message, file=stderr)
8 |         exit(1)
9 | 


--------------------------------------------------------------------------------
/api/utils/aio.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from functools import wraps
 3 | 
 4 | 
 5 | async def wait_first(coros):
 6 |     futures = [asyncio.ensure_future(c) for c in coros]
 7 |     try:
 8 |         done, _ = await asyncio.wait(futures, return_when=asyncio.FIRST_COMPLETED)
 9 |         done_future = done.pop()
10 |         done_coro = coros[futures.index(done_future)]
11 |         return done_future.result(), done_coro
12 |     finally:
13 |         for future in futures:
14 |             future.cancel()
15 |         for future in futures:
16 |             try:
17 |                 await future
18 |             except:
19 |                 pass
20 | 
21 | 
22 | async def until_true(func, interval=1):
23 |     while not await func():
24 |         await asyncio.sleep(interval)
25 | 
26 | 
27 | async def finish_or_condition(wrapped_coro, condition_func, interval=1):
28 |     result, coro = await wait_first(
29 |         [wrapped_coro, until_true(condition_func, interval=interval)]
30 |     )
31 |     if coro is wrapped_coro:
32 |         return result
33 |     return None
34 | 
35 | 
36 | def to_future(func):
37 |     @wraps(func)
38 |     def wrapper(*args, **kwargs):
39 |         return asyncio.ensure_future(func(*args, **kwargs))
40 | 
41 |     return wrapper
42 | 
43 | 
44 | def to_threaded(func):
45 |     @wraps(func)
46 |     async def wrapper(*args, **kwargs):
47 |         return await asyncio.to_thread(func, *args, **kwargs)
48 | 
49 |     return wrapper
50 | 


--------------------------------------------------------------------------------
/api/utils/article_download.py:
--------------------------------------------------------------------------------
 1 | from newspaper import Article
 2 | from utils.aio import to_threaded
 3 | 
 4 | 
 5 | @to_threaded
 6 | def download_article(url):
 7 |     article = Article(url)
 8 |     article.download()
 9 |     article.parse()
10 |     return {"text": article.text, "title": article.title}
11 | 


--------------------------------------------------------------------------------
/api/utils/cancel.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | 
 3 | from fastapi import Response
 4 | from utils.aio import finish_or_condition
 5 | 
 6 | 
 7 | def disconnect_checker(request):
 8 |     async def checker():
 9 |         return await request.is_disconnected()
10 | 
11 |     return checker
12 | 
13 | 
14 | def cancel_on_disconnect(func):
15 |     @wraps(func)
16 |     async def wrapper(request, *args, **kwargs):
17 |         result = await finish_or_condition(
18 |             func(request, *args, **kwargs), disconnect_checker(request)
19 |         )
20 |         if result is None:
21 |             return Response(status_code=204)
22 |         return result
23 | 
24 |     return wrapper
25 | 


--------------------------------------------------------------------------------
/api/utils/request.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import aiohttp
 4 | from utils.aio import wait_first
 5 | 
 6 | 
 7 | async def _fetch(session, parse_as_json=True, **kwargs):
 8 |     async with session.request(**kwargs) as response:
 9 |         if parse_as_json:
10 |             return await response.json()
11 |         return await response.text()
12 | 
13 | 
14 | timeout = aiohttp.ClientTimeout(total=None, connect=10, sock_connect=10, sock_read=None)
15 | 
16 | 
17 | async def request(request_data, cancel_event=None, return_exceptions=True):
18 |     async with aiohttp.ClientSession(timeout=timeout) as session:
19 |         requests = []
20 |         for data in request_data:
21 |             data = data.copy()
22 |             method = data.get("method")
23 |             if method is None:
24 |                 data["method"] = "GET" if data.get("json") is None else "POST"
25 |             requests.append(_fetch(session, **data))
26 |         gather_coro = asyncio.gather(*requests, return_exceptions=return_exceptions)
27 |         coros = [gather_coro]
28 |         if cancel_event is not None:
29 |             coros.append(cancel_event.wait())
30 |         result, coro = await wait_first(coros)
31 |         if coro is gather_coro:
32 |             return result
33 |         raise asyncio.CancelledError()
34 | 


--------------------------------------------------------------------------------
/api/utils/semantic.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from model_setup import SPACY_MODEL
 3 | from utils.aio import to_threaded
 4 | 
 5 | 
 6 | class SemanticSimilarity:
 7 |     def __init__(self):
 8 |         self.nlp = spacy.load(SPACY_MODEL)
 9 | 
10 |     def _get_sentences(self, text):
11 |         if isinstance(text, str):
12 |             text = [s for s in self.nlp(text).sents if any(t.is_alpha for t in s)]
13 |         else:
14 |             text = [self.nlp(s) for s in text]
15 |         return text
16 | 
17 |     def evaluate(self, document, summary):
18 |         document_sents = self._get_sentences(document)
19 |         summary_sents = self._get_sentences(summary)
20 |         return {
21 |             "documentSentences": [doc_sent.text_with_ws for doc_sent in document_sents],
22 |             "summarySentences": [sum_sent.text_with_ws for sum_sent in summary_sents],
23 |             "scores": [
24 |                 [doc_sent.similarity(sum_sent) for doc_sent in document_sents]
25 |                 for sum_sent in summary_sents
26 |             ],
27 |         }
28 | 
29 | 
30 | evaluator = SemanticSimilarity()
31 | 
32 | 
33 | @to_threaded
34 | def semantic_similarity(sentences, summary):
35 |     return evaluator.evaluate(sentences, summary)
36 | 


--------------------------------------------------------------------------------
/api/utils/sentence.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | from utils.aio import to_threaded
3 | 
4 | 
5 | @to_threaded
6 | def sentence_split(text):
7 |     return nltk.sent_tokenize(text)
8 | 


--------------------------------------------------------------------------------
/cli/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | DEPLOY_PATH = Path("./deploy")
 4 | KUBERNETES_TEMPLATES_PATH = Path("./templates/kubernetes")
 5 | DOCKER_TEMPLATES_PATH = Path("./templates/docker")
 6 | PLUGIN_CONFIG_PATH = Path("./plugin_config/plugin_config.json")
 7 | DOCKER_COMPOSE_YAML_PATH = Path("./docker-compose.yaml")
 8 | PLUGIN_DOCKERFILE_PATH = Path("./docker/Dockerfile.plugin")
 9 | CONTAINER_PLUGIN_FILES_PATH = Path("/summary_workbench_plugin_files")
10 | CONTAINER_PLUGIN_SERVER_PATH = Path("/summary_workbench_plugin_server")
11 | DEV_BOOT_PATH = CONTAINER_PLUGIN_SERVER_PATH / "dev.boot.sh"
12 | PLUGIN_SERVER_PATH = Path("./plugin_server").absolute()
13 | REMOTE_PLUGIN_FOLDER = Path("~/.summary_workbench_plugins").expanduser()
14 | REQUIRED_FILE_GROUPS = [{"Pipfile.lock", "Pipfile", "requirements.txt"}]
15 | SCHEMA_FOLDER = Path("./schema")
16 | DEFAULT_CONFIG = "sw-config.yaml"
17 | DEFAULT_PLUGIN_CONFIG = "sw-plugin-config.yaml"
18 | DEFAULTS = {}
19 | 
20 | SETUP_SERVER_FILES_DOCKER_FILE = f"""
21 | WORKDIR {CONTAINER_PLUGIN_SERVER_PATH}
22 | COPY . .
23 | RUN pip install -r requirements.txt
24 | """
25 | 
26 | SETUP_PLUGIN_FILES_DOCKER_FILE = f"""
27 | {{environment}}
28 | WORKDIR {CONTAINER_PLUGIN_FILES_PATH}
29 | COPY . .
30 | RUN if [ -f Pipfile.lock -o -f Pipfile ]; then pip install pipenv && pipenv install --system; else pip install -r requirements.txt; fi
31 | RUN python model_setup.py
32 | WORKDIR {CONTAINER_PLUGIN_FILES_PATH}
33 | CMD ["uvicorn", "--app-dir", "/summary_workbench_plugin_server", "app:app", "--host", "0.0.0.0", "--port", "5000"]
34 | """
35 | 


--------------------------------------------------------------------------------
/cli/git_interface.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import git
 4 | import giturlparse
 5 | from termcolor import colored
 6 | 
 7 | from .config import REMOTE_PLUGIN_FOLDER
 8 | from .exceptions import InvalidGitLinkError
 9 | 
10 | 
11 | def is_github_link(source):
12 |     return giturlparse.validate(source)
13 | 
14 | 
15 | def from_github(source):
16 |     p = giturlparse.parse(source)
17 |     plugin_name = f"{p.platform}---{p.owner}---{p.repo}---{p.branch}"
18 |     plugin_path = REMOTE_PLUGIN_FOLDER / plugin_name
19 |     if not plugin_path.exists():
20 |         print(f"cloning {colored(source, 'green')} to {colored(plugin_path, 'green')}")
21 |         print("if a login prompt shows up, the url might be wrong")
22 |         plugin_path.parent.mkdir(exist_ok=True, parents=True)
23 |         git.Repo.clone_from(source, plugin_path)
24 |     return plugin_path, p.owner
25 | 
26 | def pull(path):
27 |     print(f"pulling {colored(path, 'green')}")
28 |     git.Repo(path).remotes[0].pull()
29 | 
30 | def resolve_source(source):
31 |     if isinstance(source, Path):
32 |         return source.absolute(), None
33 |     if not is_github_link(source):
34 |         raise InvalidGitLinkError("the link is invalid", source)
35 |     return from_github(source)
36 | 


--------------------------------------------------------------------------------
/configure.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | from cli import main
4 | 
5 | if __name__ == "__main__":
6 |     main()
7 | 


--------------------------------------------------------------------------------
/debug_plugin/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | ipython = "*"
10 | ipdb = "*"
11 | 
12 | [requires]
13 | python_version = "3.10"
14 | 


--------------------------------------------------------------------------------
/debug_plugin/model_setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/debug_plugin/model_setup.py


--------------------------------------------------------------------------------
/debug_plugin/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: Debug
3 | metadata: {}
4 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.plugin:
--------------------------------------------------------------------------------
1 | ARG python_version=3.10
2 | 
3 | FROM python:${python_version}
4 | 
5 | RUN apt update && apt --yes install default-jre-headless
6 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ yarn
 9 | ```
10 | 
11 | ### Local Development
12 | 
13 | ```
14 | $ yarn start
15 | ```
16 | 
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 | 
19 | ### Build
20 | 
21 | ```
22 | $ yarn build
23 | ```
24 | 
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 | 
27 | ### Deployment
28 | 
29 | Using SSH:
30 | 
31 | ```
32 | $ USE_SSH=true yarn deploy
33 | ```
34 | 
35 | Not using SSH:
36 | 
37 | ```
38 | $ GIT_USER=<Your GitHub username> yarn deploy
39 | ```
40 | 
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 | 


--------------------------------------------------------------------------------
/docs/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/docs/docs/api-documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Api Documentation
 3 | sidebar_position: 5
 4 | ---
 5 | 
 6 | The api documentation can be found under `/api/docs` (e.g. <https://tldr.demo.webis.de/api/docs>) and `/api/redoc` (e.g. <https://tldr.demo.webis.de/api/redoc>).  
 7 | 
 8 | The script `summary-workbench.py`, which can be found in the root of the repository, can be used to access the application from the commandline.
 9 | It can also be imported in python files to build applications based on Summary Workbench.
10 | 


--------------------------------------------------------------------------------
/docs/docs/deployment.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Deployment
 3 | sidebar_position: 4
 4 | ---
 5 | 
 6 | The deployment of the application is not generic and is tailored to our custom Kubernetes cluster.
 7 | 
 8 | 1. Add the following to your plugin `sw-config.yaml`:
 9 | 
10 | ```yaml title=sw-config.yaml
11 | docker_username: <your-dockerhub-username>
12 | deploy:
13 |   host: <host-where-the-application-is-exposed>
14 |   resources: # this option can be omitted
15 |     requests:
16 |       cpu: "500m"
17 |     limits:
18 |       cpu: "4000m"
19 | ```
20 | 
21 | :::note
22 | 
23 | The `resources` field is exactly like described in <https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/>.  
24 | It is used to limit the deployed containers.
25 | 
26 | :::
27 | 
28 | 2. Login to your Dockerhub account.
29 | 3. Build the necessary images and push them to dockerhub with `./configure.py build --all` and `./configure.py push --all`.
30 | 4. Run `./configure.py gen-kubernetes` to generate the deployment files under `deploy/`.
31 | 5. Use `kubectl` to deploy the application (e.g. `kubectl apply -f`).
32 | 


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "docs",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "docusaurus": "docusaurus",
 7 |     "start": "docusaurus start",
 8 |     "build": "docusaurus build",
 9 |     "swizzle": "docusaurus swizzle",
10 |     "deploy": "docusaurus deploy",
11 |     "clear": "docusaurus clear",
12 |     "serve": "docusaurus serve",
13 |     "write-translations": "docusaurus write-translations",
14 |     "write-heading-ids": "docusaurus write-heading-ids"
15 |   },
16 |   "dependencies": {
17 |     "@docusaurus/core": "^2.2.0",
18 |     "@docusaurus/preset-classic": "^2.2.0"
19 |   },
20 |   "browserslist": {
21 |     "production": [
22 |       ">0.5%",
23 |       "not dead",
24 |       "not op_mini all"
25 |     ],
26 |     "development": [
27 |       "last 1 chrome version",
28 |       "last 1 firefox version",
29 |       "last 1 safari version"
30 |     ]
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/docs/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 | // @ts-check
13 | 
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 |   // By default, Docusaurus generates a sidebar from the docs folder structure
17 |   tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
18 | 
19 |   // But you can create a sidebar manually
20 |   /*
21 |   tutorialSidebar: [
22 |     {
23 |       type: 'category',
24 |       label: 'Tutorial',
25 |       items: ['hello'],
26 |     },
27 |   ],
28 |    */
29 | };
30 | 
31 | module.exports = sidebars;
32 | 


--------------------------------------------------------------------------------
/docs/src/css/custom.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Any CSS included here will be global. The classic template
 3 |  * bundles Infima by default. Infima is a CSS framework designed to
 4 |  * work well for content-centric websites.
 5 |  */
 6 | 
 7 | /* You can override the default Infima variables here. */
 8 | :root {
 9 |   --ifm-color-primary: #2e8555;
10 |   --ifm-color-primary-dark: #29784c;
11 |   --ifm-color-primary-darker: #277148;
12 |   --ifm-color-primary-darkest: #205d3b;
13 |   --ifm-color-primary-light: #33925d;
14 |   --ifm-color-primary-lighter: #359962;
15 |   --ifm-color-primary-lightest: #3cad6e;
16 |   --ifm-code-font-size: 95%;
17 | }
18 | 
19 | /* For readability concerns, you should choose a lighter palette in dark mode. */
20 | [data-theme='dark'] {
21 |   --ifm-color-primary: #25c2a0;
22 |   --ifm-color-primary-dark: #21af90;
23 |   --ifm-color-primary-darker: #1fa588;
24 |   --ifm-color-primary-darkest: #1a8870;
25 |   --ifm-color-primary-light: #29d5b0;
26 |   --ifm-color-primary-lighter: #32d8b4;
27 |   --ifm-color-primary-lightest: #4fddbf;
28 | }
29 | 
30 | .docusaurus-highlight-code-line {
31 |   background-color: rgba(0, 0, 0, 0.1);
32 |   display: block;
33 |   margin: 0 calc(-1 * var(--ifm-pre-padding));
34 |   padding: 0 var(--ifm-pre-padding);
35 | }
36 | 
37 | [data-theme='dark'] .docusaurus-highlight-code-line {
38 |   background-color: rgba(0, 0, 0, 0.3);
39 | }
40 | 


--------------------------------------------------------------------------------
/docs/src/pages/index.module.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * CSS files with the .module.css suffix will be treated as CSS modules
 3 |  * and scoped locally.
 4 |  */
 5 | 
 6 | .heroBanner {
 7 |   padding: 4rem 0;
 8 |   text-align: center;
 9 |   position: relative;
10 |   overflow: hidden;
11 | }
12 | 
13 | @media screen and (max-width: 996px) {
14 |   .heroBanner {
15 |     padding: 2rem;
16 |   }
17 | }
18 | 
19 | .buttons {
20 |   display: flex;
21 |   align-items: center;
22 |   justify-content: center;
23 | }
24 | 


--------------------------------------------------------------------------------
/docs/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/.nojekyll


--------------------------------------------------------------------------------
/docs/static/evaluation_input.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/evaluation_input.gif


--------------------------------------------------------------------------------
/docs/static/evaluation_plotter.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/evaluation_plotter.gif


--------------------------------------------------------------------------------
/docs/static/evaluation_scores.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/evaluation_scores.gif


--------------------------------------------------------------------------------
/docs/static/evaluation_visualization.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/evaluation_visualization.gif


--------------------------------------------------------------------------------
/docs/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/img/favicon.png


--------------------------------------------------------------------------------
/docs/static/summarize_input.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/summarize_input.gif


--------------------------------------------------------------------------------
/docs/static/summarize_pdf_extract.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/summarize_pdf_extract.gif


--------------------------------------------------------------------------------
/docs/static/summarize_usage.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/docs/static/summarize_usage.gif


--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 


--------------------------------------------------------------------------------
/frontend/.eslintrc.yml:
--------------------------------------------------------------------------------
 1 | env:
 2 |   browser: true
 3 |   node: true
 4 | extends:
 5 |   - airbnb
 6 |   - prettier
 7 |   - eslint:recommended
 8 |   - plugin:react/recommended
 9 |   - plugin:react-hooks/recommended
10 | parser: "@babel/eslint-parser"
11 | parserOptions:
12 |   requireConfigFile: false
13 |   ecmaFeatures:
14 |     jsx: true
15 |   ecmaVersion: 12
16 |   sourceType: module
17 |   babelOptions:
18 |     presets:
19 |       - "@babel/preset-react"
20 | settings:
21 |   react:
22 |     version: detect
23 | plugins:
24 |   - prettier
25 |   - react
26 |   - react-hooks
27 |   - simple-import-sort
28 |   - import
29 |   - promise
30 | rules:
31 |   react/function-component-definition: 0
32 |   react/jsx-curly-brace-presence: 0
33 |   react/react-in-jsx-scope: 0
34 |   react/jsx-filename-extension: 0
35 |   react/no-array-index-key: 0
36 |   react/prop-types: 0
37 |   import/prefer-default-export: 0
38 |   react/button-has-type: 0
39 |   no-underscore-dangle: 0
40 |   react/jsx-props-no-spreading: 0
41 |   no-plusplus: 0
42 |   react/display-name: 0
43 |   jsx-a11y/anchor-has-content: 0
44 |   jsx-a11y/control-has-associated-label: 0
45 |   jsx-a11y/label-has-associated-control: 0
46 |   jsx-a11y/no-autofocus: 0
47 |   jsx-a11y/click-events-have-key-events: 0
48 |   jsx-a11y/no-static-element-interactions: 0
49 |   no-constant-condition: 0
50 |   import/no-extraneous-dependencies: 0
51 |   no-restricted-syntax: 0
52 |   max-classes-per-file: 0
53 |   no-unused-vars: "warn"
54 | 


--------------------------------------------------------------------------------
/frontend/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2,
3 |   "printWidth": 100
4 | }
5 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:19-slim as build-stage
 2 | 
 3 | WORKDIR /app
 4 | COPY . /app/
 5 | RUN npm ci
 6 | RUN npm run build
 7 | 
 8 | FROM nginx:alpine
 9 | COPY ./nginx.conf /etc/nginx/nginx.conf
10 | COPY --from=build-stage /app/build /srv/static
11 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile.dev:
--------------------------------------------------------------------------------
1 | FROM node:19-slim
2 | 


--------------------------------------------------------------------------------
/frontend/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 1;
 2 | 
 3 | events {
 4 |     worker_connections 1024;
 5 | }
 6 | 
 7 | http {
 8 |     server {
 9 |         listen 80;
10 | 
11 |         include /etc/nginx/mime.types;
12 |         default_type  application/octet-stream;
13 | 
14 |         charset utf-8;
15 |         charset_types *;
16 |         gzip on;
17 |         gzip_types *;
18 |         sendfile on;
19 | 
20 |         root /srv/static;
21 |         try_files $uri /index.html;
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 


--------------------------------------------------------------------------------
/frontend/src/api.js:
--------------------------------------------------------------------------------
 1 | import { get, post, wrappedFetch } from "./request";
 2 | 
 3 | const getMetricsRequest = () => get("/api/metrics");
 4 | const getSummarizersRequest = () => get("/api/summarizers");
 5 | 
 6 | const evaluateRequest = (metrics, references, hypotheses, abortController) =>
 7 |   post("/api/evaluate", { metrics, references, hypotheses }, { abortController });
 8 | 
 9 | const summarizeRequest = (documents, summarizers, ratio, bulk, abortController) => {
10 |   const data = { documents, summarizers, ratio, add_metadata: true };
11 |   if (!bulk) {
12 |     data.split_sentences = true;
13 |   }
14 |   return post("/api/summarize", data, { abortController });
15 | };
16 | 
17 | const pdfExtractRequest = async (pdf) => {
18 |   const res = await wrappedFetch("/api/pdf/extract", {
19 |     method: "POST",
20 |     body: pdf,
21 |   });
22 |   if (res.ok) {
23 |     const data = await res.json();
24 |     if (data.error) throw new Error(data.error);
25 |     return data;
26 |   }
27 |   throw new Error(`request failed with status ${res.status}`);
28 | };
29 | 
30 | const semanticRequest = async (sentences, summary) =>
31 |   post("/api/semantic_similarity", { sentences, summary });
32 | 
33 | const feedbackRequest = (summarizer, summary, reference, url, feedback) => {
34 |   let json = { summarizer, summary, reference, feedback };
35 |   if (url !== null) json = { url, ...json };
36 |   return post("/api/feedback", json);
37 | };
38 | 
39 | export {
40 |   getMetricsRequest,
41 |   getSummarizersRequest,
42 |   evaluateRequest,
43 |   summarizeRequest,
44 |   pdfExtractRequest,
45 |   feedbackRequest,
46 |   semanticRequest,
47 | };
48 | 


--------------------------------------------------------------------------------
/frontend/src/components/OneHypRef.js:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useState } from "react";
 2 | 
 3 | import { Textarea } from "./utils/Form";
 4 | import { FlexResponsive } from "./utils/Layout";
 5 | 
 6 | const TextField = ({ value, setValue, placeholder }) => (
 7 |   <Textarea
 8 |     value={value}
 9 |     onChange={(e) => setValue(e.currentTarget.value)}
10 |     rows="8"
11 |     placeholder={placeholder}
12 |   />
13 | );
14 | 
15 | const OneHypRef = ({ setComputeData }) => {
16 |   const [hypText, setHypText] = useState("");
17 |   const [refText, setRefText] = useState("");
18 | 
19 |   useEffect(() => {
20 |     const errors = [];
21 |     if (!refText) errors.push("reference text is missing");
22 |     if (!hypText) errors.push("hypothesis text is missing");
23 |     if (errors.length) {
24 |       setComputeData({ errors });
25 |       return;
26 |     }
27 |     try {
28 |       setComputeData({
29 |         data: {
30 |           id: "",
31 |           lines: [{ reference: refText, hypothesis: hypText }],
32 |         },
33 |       });
34 |     } catch (error) {
35 |       setComputeData({ errors: [error.message] });
36 |     }
37 |   }, [hypText, refText, setComputeData]);
38 | 
39 |   return (
40 |     <FlexResponsive>
41 |       <TextField value={refText} setValue={setRefText} placeholder="Enter the reference text." />
42 |       <TextField value={hypText} setValue={setHypText} placeholder="Enter the predicted text." />
43 |     </FlexResponsive>
44 |   );
45 | };
46 | 
47 | export { OneHypRef };
48 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Badge.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | const badgeStyles = {
 4 |   fill: {
 5 |     "*": "py-[6px] px-[10px]",
 6 |     primary: "bg-blue-600 text-white",
 7 |     secondary: "bg-gray-200 text-black",
 8 |     success: "bg-green-600 text-white",
 9 |     warning: "bg-yellow-600 text-white",
10 |     danger: "bg-red-600 text-white",
11 |   },
12 |   text: {
13 |     "*": "",
14 |     primary: "text-blue-600",
15 |     secondary: "text-gray-600",
16 |     success: "text-green-600",
17 |     warning: "text-yellow-600",
18 |     danger: "text-red-600",
19 |   },
20 | };
21 | 
22 | const Badge = ({ children, appearance = "fill", variant = "primary", uppercase }) => (
23 |   <span
24 |     className={`${uppercase ? "uppercase" : ""} ${badgeStyles[appearance]["*"]} ${
25 |       badgeStyles[appearance][variant]
26 |     } whitespace-nowrap flex items-center gap-2 leading-none align-baseline text-xs font-bold rounded-full`}
27 |   >
28 |     {children}
29 |   </span>
30 | );
31 | 
32 | const BadgeGroup = ({ children }) => <div className="flex flex-wrap gap-2">{children}</div>;
33 | 
34 | export { Badge, BadgeGroup };
35 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Card.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | const Card = ({ children, full }) => (
 4 |   <div
 5 |     className={`${
 6 |       full ? "w-full" : "max-w-sm"
 7 |     } flex h-full grow flex-col divide-slate-300 divide-y bg-slate-100 rounded-lg border border-gray-200 shadow-md`}
 8 |   >
 9 |     {children}
10 |   </div>
11 | );
12 | 
13 | const CardHead = ({ children, tight }) => (
14 |   <div
15 |     className={`${
16 |       tight ? "min-h-[50px]" : "py-4 min-h-[80px]"
17 |     } px-6 w-full flex gap-4 justify-between items-center`}
18 |   >
19 |     {children}
20 |   </div>
21 | );
22 | 
23 | const CardContent = ({ children, white, tight }) => (
24 |   <div className={`${tight ? "p-3" : "p-6"} space-y-3 flex-grow flex-col ${white ? "bg-white" : ""}`}>{children}</div>
25 | );
26 | 
27 | const CardFoot = ({ children }) => <div className="p-6">{children}</div>;
28 | 
29 | export { Card, CardHead, CardContent, CardFoot };
30 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Container.js:
--------------------------------------------------------------------------------
1 | const Container = ({ children }) => <div className="container mx-auto px-5 h-full">{children}</div>;
2 | 
3 | export { Container };
4 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Disclosure.js:
--------------------------------------------------------------------------------
 1 | import { Disclosure as HDisclosure } from "@headlessui/react";
 2 | 
 3 | const DisclosureToggle = ({ children }) => (
 4 |   <HDisclosure.Button className="w-full">{children}</HDisclosure.Button>
 5 | );
 6 | 
 7 | const DisclosureContent = ({ children }) => (
 8 |   <HDisclosure.Panel className="w-full">{children}</HDisclosure.Panel>
 9 | );
10 | 
11 | const Disclosure = ({ children }) => <HDisclosure>{children}</HDisclosure>;
12 | 
13 | export { Disclosure, DisclosureContent, DisclosureToggle };
14 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Error.js:
--------------------------------------------------------------------------------
 1 | import { HeadingMedium, Hint } from "./Text";
 2 | 
 3 | const ErrorBox = ({ children }) => (
 4 |   <div>
 5 |     <HeadingMedium>Errors</HeadingMedium>
 6 |     <div className="border p-2 max-h-[200px] overflow-y-scroll">{children}</div>
 7 |   </div>
 8 | );
 9 | 
10 | const Errors = ({ errors, nested, type = "danger" }) => {
11 |   if (Array.isArray(errors)) {
12 |     return errors.map((err, i) => <Errors key={i} errors={err} nested={nested} type={type} />);
13 |   }
14 | 
15 |   const { name, message } = errors;
16 | 
17 |   let subErrors;
18 |   if (typeof errors === "string") subErrors = errors;
19 |   else {
20 |     subErrors = errors.errors;
21 |     if (subErrors === undefined) {
22 |       if (message === undefined) throw new Error("either 'errors' or 'message' needs to be set");
23 |       subErrors = message;
24 |     }
25 |   }
26 | 
27 |   let inner;
28 |   if (typeof subErrors === "string") inner = <div>{subErrors}</div>;
29 |   else inner = <Errors errors={subErrors} nested type={type} />;
30 | 
31 |   if (name !== undefined) {
32 |     inner = (
33 |       <>
34 |         <div>{name}:</div>
35 |         <div className="ml-5">{inner}</div>
36 |       </>
37 |     );
38 |   }
39 | 
40 |   if (nested) inner = <div>{inner}</div>;
41 |   else
42 |     inner = (
43 |       <Hint type={type} small>
44 |         {inner}
45 |       </Hint>
46 |     );
47 | 
48 |   return inner;
49 | };
50 | 
51 | export { Errors, ErrorBox };
52 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/FuzzySearch.js:
--------------------------------------------------------------------------------
 1 | import Fuse from "fuse.js";
 2 | import React, { useMemo, useState } from "react";
 3 | import { FaSearch } from "react-icons/fa";
 4 | 
 5 | import { Input } from "./Form";
 6 | 
 7 | const LiveSearch = ({ query, setQuery, ...props }) => (
 8 |   <Input
 9 |     Icon={FaSearch}
10 |     placeholder="Search"
11 |     value={query}
12 |     small
13 |     onChange={(e) => setQuery(e.currentTarget.value)}
14 |     {...props}
15 |   />
16 | );
17 | 
18 | const useFilter = (keys) => {
19 |   const fuse = useMemo(() => new Fuse(keys), [keys]);
20 |   const [query, setQuery] = useState("");
21 |   const filteredKeys = useMemo(() => {
22 |     if (!query) return keys;
23 |     return fuse.search(query).map(({ refIndex }) => keys[refIndex]);
24 |   }, [fuse, keys, query]);
25 |   return { query, setQuery, filteredKeys };
26 | };
27 | 
28 | export { useFilter, LiveSearch };
29 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Icons.js:
--------------------------------------------------------------------------------
 1 | import { FaBars, FaEye, FaEyeSlash, FaThumbsDown, FaThumbsUp } from "react-icons/fa";
 2 | 
 3 | const withClass =
 4 |   (WrappedComponent, classes) =>
 5 |   ({ className, ...props }) =>
 6 |     <WrappedComponent className={`${classes} ${className || ""}`} {...props} />;
 7 | 
 8 | const ThumbsUp = withClass(FaThumbsUp, "hover:text-green-600");
 9 | const ThumbsDown = withClass(FaThumbsDown, "hover:text-red-600");
10 | const Bars = withClass(FaBars, "hover:text-blue-600");
11 | const EyeOpen = withClass(FaEye, "hover:text-blue-600");
12 | const EyeClosed = withClass(FaEyeSlash, "hover:text-blue-600");
13 | const Eye = ({ show, ...props }) => {
14 |   const Icon = show ? EyeOpen : EyeClosed;
15 |   return <Icon {...props} />;
16 | };
17 | 
18 | export { ThumbsUp, ThumbsDown, Bars, EyeOpen, EyeClosed, Eye };
19 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Layout.js:
--------------------------------------------------------------------------------
 1 | const FlexResponsive = ({ children }) => (
 2 |   <div className="flex gap-4 flex-col md:flex-row lg:flex-col xl:flex-row mb-3">{children}</div>
 3 | );
 4 | 
 5 | const SpaceGap = ({ children, big }) => (
 6 |   <div className={big ? "space-y-4" : "space-y-2"}>{children}</div>
 7 | );
 8 | const FlexGap = ({ children, big }) => (
 9 |   <div className={`flex flex-col ${big ? "gap-4" : "gap-2"}`}>{children}</div>
10 | );
11 | 
12 | export { FlexResponsive, SpaceGap, FlexGap };
13 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Loading.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | import { Spinner } from "./Spinner";
 4 | 
 5 | const Loading = ({ small, ...props }) => (
 6 |   <div className={small ? "w-[15px]" : "w-[20px]"}>
 7 |     <Spinner {...props} />
 8 |   </div>
 9 | );
10 | 
11 | const CenterLoading = (props) => (
12 |   <div className="flex justify-center">
13 |     <Loading {...props}  />
14 |   </div>
15 | );
16 | 
17 | export { Loading, CenterLoading };
18 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Modal.js:
--------------------------------------------------------------------------------
 1 | import { Dialog } from "@headlessui/react";
 2 | import React, { useState } from "react";
 3 | 
 4 | const ModalTitle = ({ children }) => (
 5 |   <Dialog.Title className="text-3xl font-bold">{children}</Dialog.Title>
 6 | );
 7 | 
 8 | const useModal = () => {
 9 |   const [isOpen, setIsOpen] = useState(false);
10 |   const open = () => setIsOpen(true);
11 |   const close = () => setIsOpen(false);
12 |   return [isOpen, open, close];
13 | };
14 | 
15 | const Modal = ({ children, isOpen, close, fit }) => {
16 |   const fitClass = fit
17 |     ? "top-1/2 left-1/2 transform -translate-x-1/2 -translate-x-1/2"
18 |     : "inset-x-12 inset-y-6";
19 |   return (
20 |     <Dialog open={isOpen} onClose={close}>
21 |       <Dialog.Overlay className="fixed inset-0 bg-black opacity-30 z-20" />
22 |       <div
23 |         className={`${fitClass} fixed bg-white shadow-xl shadow-stone-400 z-50 border overflow-y-auto bg-slate`}
24 |       >
25 |         {children}
26 |       </div>
27 |     </Dialog>
28 |   );
29 | };
30 | 
31 | export { Modal, ModalTitle, useModal };
32 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Toggle.js:
--------------------------------------------------------------------------------
 1 | import { Switch } from "@headlessui/react";
 2 | 
 3 | const Toggle = ({ checked, onChange, onBlur }) => (
 4 |   <Switch
 5 |     checked={checked}
 6 |     onChange={onChange}
 7 |     onBlur={onBlur}
 8 |     className={`${
 9 |       checked ? "bg-blue-600" : "bg-gray-200"
10 |     } relative inline-flex flex-shrink-0 h-[24px] w-[46px] border-2 border-transparent rounded-full cursor-pointer transition-colors ease-in-out duration-200 focus:outline-none focus-visible:ring-2  focus-visible:ring-white focus-visible:ring-opacity-75`}
11 |   >
12 |     <span className="sr-only">Use setting</span>
13 |     <span
14 |       aria-hidden="true"
15 |       className={`${
16 |         checked ? "translate-x-[22px]" : ""
17 |       } pointer-events-none inline-block h-[20px] w-[20px] rounded-full bg-white shadow-lg transform ring-0 transition ease-in-out duration-200`}
18 |     />
19 |   </Switch>
20 | );
21 | 
22 | export { Toggle };
23 | 


--------------------------------------------------------------------------------
/frontend/src/components/utils/Tooltip.js:
--------------------------------------------------------------------------------
 1 | import "react-tooltip/dist/react-tooltip.css";
 2 | 
 3 | import { useId } from "react";
 4 | import { Tooltip as ReactTooltip } from "react-tooltip";
 5 | 
 6 | const Tooltip = ({ place = "left", text, children }) => {
 7 |   const id = useId();
 8 |   return (
 9 |     <div>
10 |       <div id={id} data-tooltip-content={text}>
11 |         {children}
12 |       </div>
13 |       <ReactTooltip
14 |         anchorId={id}
15 |         place={place}
16 |         style={{ backgroundColor: "#656565", maxWidth: "400px", padding: "2px 6px", opacity: 1 }}
17 |       />
18 |     </div>
19 |   );
20 | };
21 | 
22 | export { Tooltip };
23 | 


--------------------------------------------------------------------------------
/frontend/src/config.js:
--------------------------------------------------------------------------------
1 | const apiBase = process.env.NODE_ENV === "development" ? "http://localhost:5000" : "";
2 | 
3 | export { apiBase };
4 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/DragContext.js:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useState } from "react";
 2 | 
 3 | const DragContext = React.createContext();
 4 | 
 5 | const DragProvider = ({ children }) => {
 6 |   const [numDragged, setNumDragged] = useState(0);
 7 |   const [dragged, setDragged] = useState(false);
 8 |   const increment = () => setNumDragged((v) => v + 1);
 9 |   const decrement = () => setNumDragged((v) => v - 1);
10 |   useEffect(() => {
11 |     setDragged(numDragged > 0);
12 |   }, [numDragged, setDragged]);
13 | 
14 |   return (
15 |     <DragContext.Provider value={dragged}>
16 |       <div onDragEnter={increment} onDragLeave={decrement} onDrop={decrement}>
17 |         {children}
18 |       </div>
19 |     </DragContext.Provider>
20 |   );
21 | };
22 | 
23 | export { DragContext, DragProvider };
24 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/HoverContext.js:
--------------------------------------------------------------------------------
 1 | import React, { useState } from "react";
 2 | 
 3 | const HoverContext = React.createContext();
 4 | 
 5 | const HoverProvider = ({ children }) => {
 6 |   const [hovered, setHovered] = useState(false);
 7 | 
 8 |   return (
 9 |     <HoverContext.Provider value={hovered}>
10 |       <div onMouseEnter={() => setHovered(true)} onMouseLeave={() => setHovered(false)}>
11 |         {children}
12 |       </div>
13 |     </HoverContext.Provider>
14 |   );
15 | };
16 | 
17 | export { HoverContext, HoverProvider };
18 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/MetricsContext.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | import { getMetricsRequest } from "../api";
 4 | import { usePlugins } from "../hooks/plugins";
 5 | 
 6 | const defaults = ["metric-null-rouge"];
 7 | 
 8 | const MetricsContext = React.createContext();
 9 | 
10 | const MetricsProvider = ({ children }) => {
11 |   const metrics = usePlugins(getMetricsRequest, defaults);
12 |   return <MetricsContext.Provider value={metrics}>{children}</MetricsContext.Provider>;
13 | };
14 | export { MetricsContext, MetricsProvider };
15 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/SettingsContext.js:
--------------------------------------------------------------------------------
 1 | import React, { useMemo } from "react";
 2 | import { useLocalStorage } from "react-use";
 3 | 
 4 | import { ColorMap } from "../utils/color";
 5 | 
 6 | const SettingsContext = React.createContext();
 7 | 
 8 | const SettingsProvider = ({ children }) => {
 9 |   const [selfSimilarities, setSelfSimilarities] = useLocalStorage("allow-self-similarities", false);
10 |   const [ignoreStopwords, setIgnoreStopwords] = useLocalStorage("ignore-stopwords", true);
11 |   const [minOverlap, setMinOverlap] = useLocalStorage("min-overlap", 3);
12 |   const [summaryLength, setSummaryLength] = useLocalStorage("summary-length", 15);
13 |   const [colorscheme, setColorscheme] = useLocalStorage("colorscheme", "soft");
14 | 
15 |   const colorMap = useMemo(() => new ColorMap(colorscheme, true), [colorscheme]);
16 | 
17 |   const value = useMemo(
18 |     () => ({
19 |       minOverlap,
20 |       setMinOverlap,
21 |       ignoreStopwords,
22 |       setIgnoreStopwords,
23 |       selfSimilarities,
24 |       setSelfSimilarities,
25 |       colorMap,
26 |       setColorscheme,
27 |       summaryLength,
28 |       setSummaryLength,
29 |     }),
30 |     [
31 |       minOverlap,
32 |       ignoreStopwords,
33 |       setIgnoreStopwords,
34 |       setMinOverlap,
35 |       selfSimilarities,
36 |       setSelfSimilarities,
37 |       colorMap,
38 |       setColorscheme,
39 |       summaryLength,
40 |       setSummaryLength,
41 |     ]
42 |   );
43 | 
44 |   return <SettingsContext.Provider value={value}>{children}</SettingsContext.Provider>;
45 | };
46 | export { SettingsContext, SettingsProvider };
47 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/SummarizersContext.js:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | import { getSummarizersRequest } from "../api";
 4 | import { usePlugins } from "../hooks/plugins";
 5 | 
 6 | const defaults = ["summarizer-null-textrank", "summarizer-null-bartcnn"];
 7 | 
 8 | const SummarizersContext = React.createContext();
 9 | 
10 | const SummarizersProvider = ({ children }) => {
11 |   const summarizers = usePlugins(getSummarizersRequest, defaults);
12 |   return <SummarizersContext.Provider value={summarizers}>{children}</SummarizersContext.Provider>;
13 | };
14 | export { SummarizersContext, SummarizersProvider };
15 | 


--------------------------------------------------------------------------------
/frontend/src/css/App.css:
--------------------------------------------------------------------------------
 1 | /* body { font-family: monospace; } */
 2 | /*
 3 | General rules
 4 | - Use pt only for setting fonts, px for everything else. See https://www.w3.org/Style/Examples/007/units.en.html
 5 | 
 6 | */
 7 | /* Styling scrollbar */
 8 | 
 9 | body {
10 |   scrollbar-width: 10px !important;
11 |   scrollbar-color: #55a6d1 #9bc4d8;
12 | }
13 | 
14 | /* Scrollbar styles*/
15 | *::-webkit-scrollbar {
16 |   width: 10px;
17 | }
18 | 
19 | *::-webkit-scrollbar-track {
20 |   background: #9bc4d8;
21 | }
22 | 
23 | *::-webkit-scrollbar-thumb {
24 |   background-color: #55a6d1;
25 |   border-radius: 20px;
26 |   border: 3px solid #9bc4d8;
27 | }
28 | 
29 | .summary-border {
30 |   padding: 10px 10px;
31 |   border-radius: 20px;
32 |   border: 2.5px solid #376c47;
33 | }
34 | 
35 | textarea::placeholder {
36 |   line-height: 8em;
37 |   text-align: center;
38 |   font-size: 1.3em;
39 | }
40 | 
41 | .summarizer-toggle-view-enter-done {
42 |   transform: rotate(0);
43 | }
44 | .summarizer-toggle-view-enter {
45 |   transform: rotate(90deg);
46 | }
47 | .summarizer-toggle-view-enter-active {
48 |   transform: rotate(0);
49 |   transition: transform 300ms;
50 | }
51 | .summarizer-toggle-view-exit-done {
52 |   transform: rotate(90deg);
53 | }
54 | .summarizer-toggle-view-exit {
55 |   transform: rotate(0);
56 |   transition: transform 300ms;
57 | }
58 | .summarizer-toggle-view-exit-active {
59 |   transform: rotate(90deg);
60 |   transition: transform 300ms;
61 | }
62 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/abortController.js:
--------------------------------------------------------------------------------
 1 | import { useCallback, useEffect, useState } from "react";
 2 | 
 3 | const useAbortController = () => {
 4 |   const [abortController, setAbortController] = useState(null);
 5 | 
 6 |   useEffect(
 7 |     () => () => {
 8 |       if (abortController) abortController.abort();
 9 |     },
10 |     [abortController]
11 |   );
12 | 
13 |   const reset = useCallback(() => {
14 |     const controller = new AbortController();
15 |     setAbortController(controller);
16 |     return controller;
17 |   }, [setAbortController]);
18 |   const abort = useCallback(() => abortController.abort(), [abortController]);
19 | 
20 |   return { abortController, reset, abort };
21 | };
22 | 
23 | export { useAbortController };
24 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/calculations.js:
--------------------------------------------------------------------------------
 1 | import { useLiveQuery } from "dexie-react-hooks";
 2 | 
 3 | import { initDatabase } from "../utils/saved";
 4 | 
 5 | const calc = initDatabase("calculation", "metrics,scores,hypotheses,references");
 6 | 
 7 | const useCalculations = () => {
 8 |   const calculations = useLiveQuery(calc.getAll);
 9 |   const del = (id) => calc.collection.delete(id);
10 |   return {
11 |     calculations,
12 |     add: calc.add,
13 |     del,
14 |   };
15 | };
16 | 
17 | export { useCalculations };
18 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/list.js:
--------------------------------------------------------------------------------
 1 | import { useCallback, useMemo, useReducer, useRef } from "react";
 2 | 
 3 | const useList = (initialList = []) => {
 4 |   const id = useRef(0);
 5 |   const initialState = useMemo(
 6 |     () => Object.fromEntries(initialList.map((element) => [id.current++, element])),
 7 |     [initialList]
 8 |   );
 9 |   const [elements, elementsReducer] = useReducer((state, action) => {
10 |     let newElements;
11 |     let element;
12 |     let elementID;
13 |     switch (action.type) {
14 |       case "ADD":
15 |         element = action.payload;
16 |         return { ...state, [id.current++]: element };
17 |       case "REMOVE":
18 |         elementID = action.payload;
19 |         newElements = { ...state };
20 |         delete newElements[elementID];
21 |         return newElements;
22 |       case "ALTER":
23 |         [elementID, element] = action.payload;
24 |         return { ...state, [elementID]: element };
25 |       default:
26 |         return state;
27 |     }
28 |   }, initialState);
29 | 
30 |   const addElement = useCallback((element) => elementsReducer({ type: "ADD", payload: element }), [
31 |     elementsReducer,
32 |   ]);
33 |   const removeElement = useCallback(
34 |     (elementID) => {
35 |       elementsReducer({ type: "REMOVE", payload: elementID });
36 |     },
37 |     [elementsReducer]
38 |   );
39 |   const alterElement = useCallback(
40 |     (elementID, element) => {
41 |       elementsReducer({ type: "ALTER", payload: [elementID, element] });
42 |     },
43 |     [elementsReducer]
44 |   );
45 | 
46 |   return [elements, addElement, removeElement, alterElement];
47 | };
48 | 
49 | export { useList };
50 | 


--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 


--------------------------------------------------------------------------------
/frontend/src/index.js:
--------------------------------------------------------------------------------
 1 | import "./index.css";
 2 | 
 3 | import React from "react";
 4 | import { createRoot } from "react-dom/client";
 5 | 
 6 | import App from "./App";
 7 | 
 8 | const container = document.getElementById("root");
 9 | const root = createRoot(container);
10 | 
11 | root.render(
12 |   // <React.StrictMode>
13 |   <App />
14 |   // </React.StrictMode>
15 | );
16 | 


--------------------------------------------------------------------------------
/frontend/src/request.js:
--------------------------------------------------------------------------------
 1 | import { apiBase } from "./config";
 2 | 
 3 | const request = async (method, path, json, options = {}) => {
 4 |   const { auth, abortController } = options;
 5 |   const args = { method, credentials: "include" };
 6 |   const headers = {};
 7 |   if (json) {
 8 |     args.body = JSON.stringify(json);
 9 |     headers["Content-Type"] = "application/json";
10 |   }
11 |   if (auth) headers.Authorization = `Bearer ${auth}`;
12 |   args.headers = headers;
13 |   if (abortController) args.signal = abortController.signal;
14 |   try {
15 |     const response = await fetch(`${apiBase}${path}`, args);
16 |     return response.json();
17 |   } catch (error) {
18 |     if (abortController && abortController.signal.aborted) return undefined;
19 |     throw error
20 |   }
21 | };
22 | 
23 | const post = async (path, json, options) => request("POST", path, json, options);
24 | const get = async (path, options) => request("GET", path, null, options);
25 | const del = async (path, options) => request("DELETE", path, null, options);
26 | 
27 | const wrappedFetch = (path, args) => fetch(`${apiBase}${path}`, args);
28 | 
29 | export { post, get, del, wrappedFetch };
30 | 


--------------------------------------------------------------------------------
/frontend/src/utils/export.js:
--------------------------------------------------------------------------------
 1 | import { range } from "./python";
 2 | 
 3 | const tableToStrings = (table, precision) =>
 4 |   table.map((row) => row.map((cell) => (typeof cell === "number" ? cell.toFixed(precision) : " ")));
 5 | 
 6 | const transposeTable = (table) =>
 7 |   range(table[0].length).map((index) => table.map((row) => row[index]));
 8 | 
 9 | const prepareTable = (rownames, colnames, table, transpose, precision) => {
10 |   const stringTable = tableToStrings(table, precision);
11 |   if (transpose) return [colnames, rownames, transposeTable(stringTable)];
12 |   return [rownames, colnames, stringTable];
13 | };
14 | 
15 | const latex = (rownames, colnames, table, transpose, precision) => {
16 |   const [rnames, cnames, tab] = prepareTable(rownames, colnames, table, transpose, precision);
17 |   return `\\begin{tabular}{l${"r".repeat(cnames.length)}}
18 | \\toprule
19 | {} & ${cnames.join(" & ")} \\\\
20 | \\midrule
21 | ${rnames.map((name, i) => `\\textbf{${name}} & ${tab[i].join(" & ")} \\\\`).join("\n")}
22 | \\bottomrule
23 | \\end{tabular}`;
24 | };
25 | 
26 | const csv = (rownames, colnames, table, transpose, precision) => {
27 |   const [rnames, cnames, tab] = prepareTable(rownames, colnames, table, transpose, precision);
28 |   return `,${cnames.join(",")}\n${rnames.map((name, i) => `${name},${tab[i].join(",")}`).join("\n")}`;
29 | };
30 | 
31 | export default { latex, csv };
32 | 


--------------------------------------------------------------------------------
/frontend/src/utils/flatScores.js:
--------------------------------------------------------------------------------
 1 | const flatten = (scores, metrics) => {
 2 |   const flatScores = [];
 3 |   Object.entries(scores).forEach(([metric, value]) => {
 4 |     const { name } = metrics[metric].info
 5 |     if (typeof value === "number") throw new Error (`the metric '${name}' returned a number but list of number is expected`)
 6 |     if (Array.isArray(value)) flatScores.push([name, value]);
 7 |     else
 8 |       Object.entries(value).forEach(([suffix, score]) =>
 9 |         flatScores.push([`${name} ${suffix}`, score])
10 |       );
11 |   });
12 |   flatScores.sort();
13 |   return flatScores;
14 | };
15 | 
16 | export { flatten }
17 | 


--------------------------------------------------------------------------------
/frontend/src/utils/python.js:
--------------------------------------------------------------------------------
1 | const range = (count) => [...Array(count).keys()];
2 | const zip = (...arrays) => {
3 |   const count = Math.min(...arrays.map(({ length }) => length));
4 |   return range(count).map((i) => arrays.map((array) => array[i]));
5 | };
6 | 
7 | export { zip, range };
8 | 


--------------------------------------------------------------------------------
/frontend/src/utils/readFile.js:
--------------------------------------------------------------------------------
 1 | const readFile = (file, binary=false) => {
 2 |   const reader = new FileReader();
 3 | 
 4 |   return new Promise((resolve, reject) => {
 5 |     reader.onerror = () => {
 6 |       reader.abort();
 7 |       reject(new DOMException("Problem parsing input file."));
 8 |     };
 9 | 
10 |     reader.onload = () => {
11 |       resolve(reader.result);
12 |     };
13 |     if (binary) reader.readAsArrayBuffer(file)
14 |     else reader.readAsText(file);
15 |   });
16 | };
17 | 
18 | export { readFile };
19 | 


--------------------------------------------------------------------------------
/frontend/src/utils/saved.js:
--------------------------------------------------------------------------------
 1 | import Dexie from "dexie";
 2 | 
 3 | const initDatabase = (collectionName, fields) => {
 4 |   const db = new Dexie(`${collectionName}DB`);
 5 |   db.version(1).stores({
 6 |     [collectionName]: `id,${fields},_timestamp`,
 7 |   });
 8 |   const collection = db[collectionName]
 9 |   const add = async (data) => {
10 |     const id = data.id.trim()
11 |     if (!id) throw new Error("NOID")
12 |     const extendedData = { ...data, id, _timestamp: Date.now() };
13 |     try {
14 |       await collection.add(extendedData);
15 |     } catch (err) {
16 |       if (err instanceof Dexie.ConstraintError) throw new Error("TAKEN")
17 |       throw err;
18 |     }
19 |     return true;
20 |   };
21 |   const getAll = () => collection.orderBy("_timestamp").reverse().toArray();
22 |   return { collection, add, getAll };
23 | };
24 | 
25 | export { initDatabase };
26 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   content: ["./src/**/*.{js,jsx,ts,tsx}"],
3 |   theme: {
4 |     extend: {},
5 |   },
6 |   plugins: [require("@tailwindcss/forms")],
7 | };
8 | 


--------------------------------------------------------------------------------
/grobid/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM lfoppiano/grobid:0.7.1
2 | 
3 | COPY config.yaml /opt/grobid/grobid-service/config/config.yaml
4 | 
5 | 
6 | # vi: ft=dockerfile
7 | 


--------------------------------------------------------------------------------
/grobid/config.yaml:
--------------------------------------------------------------------------------
 1 | grobid:
 2 |   grobidHome: "/opt/grobid/grobid-home"
 3 |   modelPreload: true
 4 | 
 5 | server:
 6 |     type: custom
 7 |     applicationConnectors:
 8 |     - type: http
 9 |       port: 8070
10 |     adminConnectors:
11 |     - type: http
12 |       port: 8071
13 |     registerDefaultExceptionMappers: false
14 | 
15 | 
16 | logging:
17 |   level: WARN
18 |   loggers:
19 |     org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
20 |   appenders:
21 |     - type: console
22 |       threshold: ALL
23 |       timeZone: UTC
24 | 


--------------------------------------------------------------------------------
/jsonl_converter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | convert files where every line corresponds to one document/reference/model to jsonl
 4 | first argument: document file
 5 | second argument: reference file
 6 | all following arguments: model files
 7 | """
 8 | 
 9 | import json
10 | from pathlib import Path
11 | from sys import argv
12 | 
13 | files = argv[1:]
14 | if len(files) < 2:
15 |     print("provide at least a document and reference file")
16 |     exit(1)
17 | 
18 | args = [Path(file).read_text().splitlines() for file in files]
19 | keys = ["document", "reference"] + [f"model{i+1}" for i in range(len(argv) - 2)]
20 | lines = [dict(zip(keys, e)) for e in zip(*args)]
21 | for line in lines:
22 |     print(json.dumps(line, ensure_ascii=False))
23 | 


--------------------------------------------------------------------------------
/metrics/bartscore/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | numpy = "*"
 8 | torch = "*"
 9 | transformers = "*"
10 | 
11 | [dev-packages]
12 | ipython = "*"
13 | 
14 | [requires]
15 | python_version = "3.10"
16 | 


--------------------------------------------------------------------------------
/metrics/bartscore/metric.py:
--------------------------------------------------------------------------------
 1 | from bart_score import BARTScorer
 2 | 
 3 | DEVICE = "cpu"
 4 | 
 5 | 
 6 | class MetricPlugin:
 7 |     def __init__(self):
 8 |         self.bart = BARTScorer(device=DEVICE, checkpoint="facebook/bart-large-cnn")
 9 | 
10 |     def evaluate(self, batch):
11 |         hypotheses, references = zip(*batch)
12 |         return self.bart.score(hypotheses, references, batch_size=len(batch))
13 | 


--------------------------------------------------------------------------------
/metrics/bartscore/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from metric import MetricPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 |     logger.setLevel(logging.INFO)
10 |     logger.info("begin")
11 |     MetricPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/metrics/bartscore/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: BARTScore
3 | metadata:
4 |   type: semantic
5 |   homepage: https://arxiv.org/abs/2106.11520
6 |   sourcecode: https://github.com/neulab/BARTScore
7 | 


--------------------------------------------------------------------------------
/metrics/bertscore/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | numpy = "*"
10 | bert-score = "*"
11 | 
12 | [requires]
13 | python_version = "3.10"
14 | 


--------------------------------------------------------------------------------
/metrics/bertscore/metric.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from bert_score import BERTScorer
 4 | 
 5 | MODEL = os.environ.get("model") or "microsoft/deberta-xlarge-mnli"
 6 | 
 7 | 
 8 | class MetricPlugin:
 9 |     def __init__(self):
10 |         self.bert = BERTScorer(model_type=MODEL, rescale_with_baseline=True, lang="en")
11 | 
12 |     def evaluate(self, batch):
13 |         hypotheses, references = zip(*batch)
14 |         return self.bert.score(hypotheses, references)[0].tolist()
15 | 
16 |     def metadata(self):
17 |         return {"model": MODEL}
18 | 


--------------------------------------------------------------------------------
/metrics/bertscore/model_setup.py:
--------------------------------------------------------------------------------
 1 | from metric import MetricPlugin
 2 | import logging
 3 | import inspect
 4 | 
 5 | def setup():
 6 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 7 |     logger.setLevel(logging.INFO)
 8 |     logger.info("begin")
 9 |     MetricPlugin()
10 |     logger.info("done")
11 | 
12 | if __name__ == "__main__":
13 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
14 |     DATEFMT = "%H:%M:%S"
15 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
16 |     setup()
17 | 


--------------------------------------------------------------------------------
/metrics/bertscore/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: BERTScore
3 | metadata:
4 |   type: semantic
5 |   homepage: https://arxiv.org/pdf/1904.09675.pdf
6 |   sourcecode: https://github.com/Tiiiger/bert_score
7 | 


--------------------------------------------------------------------------------
/metrics/bleu/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | sacrebleu = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/bleu/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bleu import Bleu
 2 | 
 3 | 
 4 | class MetricPlugin:
 5 |     def __init__(self):
 6 |         self.bleu = Bleu()
 7 | 
 8 |     def evaluate(self, batch):
 9 |         hypotheses, references = zip(*batch)
10 |         return self.bleu.compute_score(hypotheses, references, aggregate=False)
11 | 


--------------------------------------------------------------------------------
/metrics/bleu/model_setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/bleu/model_setup.py


--------------------------------------------------------------------------------
/metrics/bleu/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: BLEU
3 | metadata:
4 |   type: lexical
5 |   homepage: https://www.aclweb.org/anthology/P02-1040.pdf
6 |   sourcecode: https://github.com/mjpost/sacreBLEU
7 | 


--------------------------------------------------------------------------------
/metrics/bleurt/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | numpy = "*"
10 | requests = "*"
11 | bleurt = {git = "https://github.com/google-research/bleurt.git"}
12 | 
13 | [requires]
14 | python_version = "3.10"
15 | 


--------------------------------------------------------------------------------
/metrics/bleurt/metric.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from urllib.parse import urljoin
 4 | 
 5 | import bleurt.score
 6 | 
 7 | 
 8 | class MetricPlugin:
 9 |     MODEL = os.environ.get("model") or "BLEURT-20"
10 |     MODEL_BASE_URL = "https://storage.googleapis.com/bleurt-oss-21/"
11 |     MODEL_PATH = Path("~/.cache/bleurt/").expanduser()
12 | 
13 |     @classmethod
14 |     def MODEL_URL(cls):
15 |         return urljoin(cls.MODEL_BASE_URL + "/", cls.MODEL + ".zip")
16 | 
17 |     def __init__(self):
18 |         self.bleurt = bleurt.score.BleurtScorer(str(self.MODEL_PATH / self.MODEL))
19 | 
20 |     def evaluate(self, batch):
21 |         hypotheses, references = zip(*batch)
22 |         return self.bleurt.score(
23 |             references=references, candidates=hypotheses, batch_size=len(batch)
24 |         )
25 | 
26 |     def metadata(self):
27 |         return {"model": self.MODEL_URL()}
28 | 


--------------------------------------------------------------------------------
/metrics/bleurt/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: BLEURT
3 | metadata:
4 |   type: semantic
5 |   homepage: https://aclanthology.org/2020.acl-main.704/
6 |   sourcecode: https://github.com/google-research/bleurt
7 | 


--------------------------------------------------------------------------------
/metrics/cider/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | numpy = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/cider/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cider import Cider
 2 | 
 3 | 
 4 | class MetricPlugin:
 5 |     def __init__(self):
 6 |         self.cider = Cider()
 7 | 
 8 |     def evaluate(self, batch):
 9 |         hypotheses, references = zip(*batch)
10 |         return self.cider.compute_score(hypotheses, references, aggregate=False)
11 | 


--------------------------------------------------------------------------------
/metrics/cider/model_setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/cider/model_setup.py


--------------------------------------------------------------------------------
/metrics/cider/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: CIDEr
3 | metadata:
4 |   type: lexical
5 |   homepage: https://arxiv.org/pdf/1411.5726.pdf
6 |   sourcecode: https://github.com/Maluuba/nlg-eval
7 | 


--------------------------------------------------------------------------------
/metrics/greedy_matching/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | requests = "*"
10 | nltk = "*"
11 | scikit-learn = "*"
12 | numpy = "*"
13 | gensim = "<4.0.0"
14 | 
15 | [requires]
16 | python_version = "3.9"  # 3.10 and higher does not work
17 | 


--------------------------------------------------------------------------------
/metrics/greedy_matching/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: Greedy Matching
3 | metadata:
4 |   type: lexical
5 |   homepage: https://www.aclweb.org/anthology/W12-2018.pdf
6 |   sourcecode: https://github.com/Maluuba/nlg-eval
7 | 


--------------------------------------------------------------------------------
/metrics/meteor/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | 
10 | [requires]
11 | python_version = "3.10"
12 | 


--------------------------------------------------------------------------------
/metrics/meteor/Pipfile.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_meta": {
 3 |         "hash": {
 4 |             "sha256": "fedbd2ab7afd84cf16f128af0619749267b62277b4cb6989ef16d4bef6e4eef2"
 5 |         },
 6 |         "pipfile-spec": 6,
 7 |         "requires": {
 8 |             "python_version": "3.10"
 9 |         },
10 |         "sources": [
11 |             {
12 |                 "name": "pypi",
13 |                 "url": "https://pypi.org/simple",
14 |                 "verify_ssl": true
15 |             }
16 |         ]
17 |     },
18 |     "default": {},
19 |     "develop": {}
20 | }
21 | 


--------------------------------------------------------------------------------
/metrics/meteor/metric/__init__.py:
--------------------------------------------------------------------------------
 1 | from .meteor import Meteor
 2 | 
 3 | __author__ = "tylin"
 4 | 
 5 | 
 6 | class MetricPlugin:
 7 |     def __init__(self):
 8 |         self.meteor = Meteor()
 9 | 
10 |     def _evaluate(self, hypotheses, references):
11 |         hyp_list_zip = [[hyp] for hyp in hypotheses]
12 |         ref_list_zip = [[ref] for ref in references]
13 | 
14 |         hyps = dict(enumerate(hyp_list_zip))
15 |         refs = dict(enumerate(ref_list_zip))
16 | 
17 |         _, scores = self.meteor.compute_score(refs, hyps)
18 |         return scores
19 | 
20 |     def evaluate(self, batch):
21 |         hypotheses, references = zip(*batch)
22 |         return self._evaluate(hypotheses, references)
23 | 


--------------------------------------------------------------------------------
/metrics/meteor/metric/data/paraphrase-en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/meteor/metric/data/paraphrase-en.gz


--------------------------------------------------------------------------------
/metrics/meteor/metric/meteor-1.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/meteor/metric/meteor-1.5.jar


--------------------------------------------------------------------------------
/metrics/meteor/model_setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/meteor/model_setup.py


--------------------------------------------------------------------------------
/metrics/meteor/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: METEOR
3 | metadata:
4 |   type: lexical
5 |   homepage: https://www.aclweb.org/anthology/W05-0909.pdf
6 |   sourcecode: https://github.com/Maluuba/nlg-eval
7 | 


--------------------------------------------------------------------------------
/metrics/moverscore/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | moverscore = {git = "https://github.com/dominik-schwabe/emnlp19-moverscore.git"}
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/moverscore/metric.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from moverscore import MoverScore
 4 | 
 5 | MODEL = os.environ.get("model") or "distilbert-base-uncased"
 6 | 
 7 | 
 8 | class MetricPlugin:
 9 |     def __init__(self):
10 |         self.mover_score = MoverScore(model_name=MODEL)
11 | 
12 |     def evaluate(self, batch):
13 |         hypotheses, references = zip(*batch)
14 |         return self.mover_score.score(references, hypotheses)
15 | 
16 |     def metadata(self):
17 |         return {"model": self.mover_score.model_name}
18 | 


--------------------------------------------------------------------------------
/metrics/moverscore/model_setup.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from metric import MetricPlugin
 4 | 
 5 | 
 6 | def setup():
 7 |     MetricPlugin()
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
12 |     DATEFMT = "%H:%M:%S"
13 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
14 |     setup()
15 | 


--------------------------------------------------------------------------------
/metrics/moverscore/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: MoverScore
3 | metadata:
4 |   type: semantic
5 |   homepage: https://arxiv.org/pdf/1909.02622.pdf
6 |   sourcecode: https://github.com/AIPHES/emnlp19-moverscore
7 | 


--------------------------------------------------------------------------------
/metrics/rouge/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | rouge = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/rouge/Pipfile.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_meta": {
 3 |         "hash": {
 4 |             "sha256": "54bb977378e46194505ce335d4f8d397c2606698f7439c78e3aa311a540745f1"
 5 |         },
 6 |         "pipfile-spec": 6,
 7 |         "requires": {
 8 |             "python_version": "3.10"
 9 |         },
10 |         "sources": [
11 |             {
12 |                 "name": "pypi",
13 |                 "url": "https://pypi.org/simple",
14 |                 "verify_ssl": true
15 |             }
16 |         ]
17 |     },
18 |     "default": {
19 |         "rouge": {
20 |             "hashes": [
21 |                 "sha256:12b48346ca47d6bcf3c45061f315452b9ccec0620ee895ec85b7efc3d54aae34",
22 |                 "sha256:28d118536e8c774dc47d1d15ec266479b4dd0914c4672ce117d4002789bdc644"
23 |             ],
24 |             "index": "pypi",
25 |             "version": "==1.0.1"
26 |         },
27 |         "six": {
28 |             "hashes": [
29 |                 "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
30 |                 "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
31 |             ],
32 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
33 |             "version": "==1.16.0"
34 |         }
35 |     },
36 |     "develop": {}
37 | }
38 | 


--------------------------------------------------------------------------------
/metrics/rouge/metric.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | from rouge import Rouge
 4 | 
 5 | model = environ["model"]
 6 | 
 7 | AVAILABLE_MODELS = {"1", "2", "l"}
 8 | if model not in AVAILABLE_MODELS:
 9 |     raise ValueError(f"invalid model {model}, needs to be one of {AVAILABLE_MODELS}")
10 | 
11 | 
12 | class MetricPlugin:
13 |     def __init__(self):
14 |         self.rouge = Rouge()
15 |         self.key = f"rouge-{model}"
16 | 
17 |     def evaluate(self, batch):
18 |         hypotheses, references = zip(*batch)
19 |         scores = self.rouge.get_scores(hypotheses, references, avg=False)
20 |         return [score[self.key]["f"] for score in scores]
21 | 
22 |     def metadata(self):
23 |         return {"model": model}
24 | 


--------------------------------------------------------------------------------
/metrics/rouge/model_setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/metrics/rouge/model_setup.py


--------------------------------------------------------------------------------
/metrics/rouge/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "ROUGE-{model}"
3 | metadata:
4 |   type: lexical
5 |   homepage: https://www.aclweb.org/anthology/W04-1013.pdf
6 |   sourcecode: https://github.com/pltrdy/rouge
7 | 


--------------------------------------------------------------------------------
/metrics/sbert/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | sentence_transformers = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/sbert/metric.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from sentence_transformers import SentenceTransformer, util
 4 | 
 5 | 
 6 | def _paired_cosine_sim(embeddings1, embeddings2):
 7 |     assert len(embeddings1) == len(embeddings2)
 8 |     return [
 9 |         float(util.pytorch_cos_sim(e1, e2)[0][0])
10 |         for e1, e2 in zip(embeddings1, embeddings2)
11 |     ]
12 | 
13 | 
14 | class MetricPlugin:
15 |     MODEL = os.environ.get("model") or "all-mpnet-base-v2"
16 | 
17 |     def __init__(self):
18 |         self.model = SentenceTransformer(self.MODEL)
19 | 
20 |     def evaluate(self, batch):
21 |         hypotheses, references = zip(*batch)
22 |         embeddings1 = self.model.encode(hypotheses, convert_to_tensor=True)
23 |         embeddings2 = self.model.encode(references, convert_to_tensor=True)
24 |         cosine_scores = _paired_cosine_sim(embeddings1, embeddings2)
25 |         return cosine_scores
26 | 
27 |     def metadata(self):
28 |         return {"model": self.MODEL}
29 | 


--------------------------------------------------------------------------------
/metrics/sbert/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from metric import MetricPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 |     logger.setLevel(logging.INFO)
10 |     logger.info("begin")
11 |     MetricPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/metrics/sbert/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: SBERT
3 | metadata:
4 |   type: semantic
5 |   homepage: https://aclanthology.org/D19-1410/
6 |   sourcecode: https://github.com/UKPLab/sentence-transformers
7 | 


--------------------------------------------------------------------------------
/metrics/spacy_similarity/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | spacy = "*"
 8 | 
 9 | [dev-packages]
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/metrics/spacy_similarity/metric.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import spacy
 4 | 
 5 | MODEL = os.environ.get("model") or "en_core_web_lg"
 6 | 
 7 | 
 8 | class MetricPlugin:
 9 |     def __init__(self):
10 |         self.nlp = spacy.load(MODEL)
11 | 
12 |     def _evaluate(self, hypotheses, references):
13 |         nlp = self.nlp
14 |         scores = [nlp(h).similarity(nlp(r)) for h, r in zip(hypotheses, references)]
15 |         return scores
16 | 
17 |     def evaluate(self, batch):
18 |         hypotheses, references = zip(*batch)
19 |         return self._evaluate(hypotheses, references)
20 | 
21 |     def metadata(self):
22 |         return {"model": MODEL}
23 | 


--------------------------------------------------------------------------------
/metrics/spacy_similarity/model_setup.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | from metric import MODEL
3 | 
4 | if not spacy.util.is_package(MODEL):
5 |     spacy.cli.download(MODEL)
6 | 


--------------------------------------------------------------------------------
/metrics/spacy_similarity/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: Spacy Similarity
3 | metadata:
4 |   type: semantic
5 |   sourcecode: https://github.com/explosion/spaCy
6 | 


--------------------------------------------------------------------------------
/other/static/BLEURT_BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/BLEURT_BERTScore.png


--------------------------------------------------------------------------------
/other/static/MoverScore_BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/MoverScore_BERTScore.png


--------------------------------------------------------------------------------
/other/static/SBERT_BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/SBERT_BERTScore.png


--------------------------------------------------------------------------------
/other/static/T5-11B_T5-3B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/T5-11B_T5-3B.png


--------------------------------------------------------------------------------
/other/static/example_low-BARTScore_medium-BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/example_low-BARTScore_medium-BERTScore.png


--------------------------------------------------------------------------------
/other/static/example_medium-BARTScore_low-BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/example_medium-BARTScore_low-BERTScore.png


--------------------------------------------------------------------------------
/other/static/select_BLEURT_BARTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/select_BLEURT_BARTScore.png


--------------------------------------------------------------------------------
/other/static/select_BLEU_BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/select_BLEU_BERTScore.png


--------------------------------------------------------------------------------
/other/static/semantic_lexical_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/semantic_lexical_variance.png


--------------------------------------------------------------------------------
/other/static/spacy_similarity_BERTScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/other/static/spacy_similarity_BERTScore.png


--------------------------------------------------------------------------------
/plugin_config/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/plugin_config/.gitkeep


--------------------------------------------------------------------------------
/plugin_server/dev.boot.sh:
--------------------------------------------------------------------------------
 1 | export VIRTUAL_ENV=/root/.venv
 2 | 
 3 | VENV_EXISTS=false
 4 | if [[ -r $VIRTUAL_ENV/pyvenv.cfg ]]; then
 5 |   VENV_PYTHON_VERSION=$(sed -n '/^version/ s/[^0-9.]//g p' $VIRTUAL_ENV/pyvenv.cfg)
 6 |   GLOBAL_PYTHON_VERSION=$(python --version | sed -n 's/[^0-9.]//g p')
 7 |   if [[ $VENV_PYTHON_VERSION == $GLOBAL_PYTHON_VERSION ]]; then
 8 |     VENV_EXISTS=true
 9 |   fi
10 | fi
11 | 
12 | if [[ $VENV_EXISTS == "false" ]]; then
13 |   echo "no valid virtualenv found, creating ..."
14 |   rm -rf $VIRTUAL_ENV
15 |   python -m venv $VIRTUAL_ENV || exit 1
16 | fi
17 | 
18 | source $VIRTUAL_ENV/bin/activate || exit 1
19 | 
20 | cd /summary_workbench_plugin_server || exit 1
21 | pip install -r /summary_workbench_plugin_server/requirements.txt || exit 1
22 | 
23 | cd /summary_workbench_plugin_files || exit 1
24 | if [[ -f Pipfile || -f Pipfile.lock ]]; then
25 |   pip install pipenv || exit 1
26 |   pipenv install || exit 1
27 | elif [[ -f requirements.txt ]]; then
28 |   pip install -r requirements.txt || exit 1
29 | else
30 |   echo "no pipfile.lock, pipfile or requirements.txt was provided"
31 |   exit 1
32 | fi
33 | python model_setup.py || exit 1
34 | uvicorn app:app --app-dir /summary_workbench_plugin_server --host 0.0.0.0 --port 5000 --reload --reload-dir /summary_workbench_plugin_files --reload-dir /summary_workbench_plugin_server
35 | 


--------------------------------------------------------------------------------
/plugin_server/errors.py:
--------------------------------------------------------------------------------
1 | def validation_exception(exc):
2 |     return {"success": False, "error": "VALIDATION", "errors": exc.errors()}
3 | 
4 | def general_exception(exc):
5 |     return {"success": False, "error": "APPLICATION", "message": str(exc)}
6 | 


--------------------------------------------------------------------------------
/plugin_server/manager/request.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from utils.aio import parallel
 4 | from utils.event import EventBox
 5 | 
 6 | 
 7 | class RequestManager:
 8 |     def __init__(self, request, check_timeout=1):
 9 |         self.request = request
10 |         self.check_timeout = check_timeout
11 |         self.disconnect_event = asyncio.Event()
12 | 
13 |     async def check_disconnected(self):
14 |         while not await self.request.is_disconnected():
15 |             await asyncio.sleep(self.check_timeout)
16 |         self.disconnect_event.set()
17 | 
18 |     async def send_to_workers(self, data, workers, response):
19 |         async with parallel(self.check_disconnected()):
20 |             event_box = EventBox(self.disconnect_event)
21 |             workers.submit(event_box, data)
22 |             await event_box.wait()
23 |             return event_box.make_response(response)
24 | 


--------------------------------------------------------------------------------
/plugin_server/metric_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from argument_models import create_function_validator
 4 | from metric import MetricPlugin
 5 | 
 6 | 
 7 | class MetricFactory:
 8 |     def __init__(self):
 9 |         self.plugin = MetricPlugin()
10 |         self.func = self.plugin.evaluate
11 |         (
12 |             self.batch_validator,
13 |             self.required_validator,
14 |             self.argument_validator,
15 |             self.full_validator,
16 |         ) = create_function_validator(self.func, [("batch", List[Tuple[str, str]])])
17 |         try:
18 |             self.metadata = self.plugin.metadata()
19 |         except AttributeError:
20 |             self.metadata = {}
21 | 


--------------------------------------------------------------------------------
/plugin_server/requirements.txt:
--------------------------------------------------------------------------------
1 | pydantic
2 | fastapi
3 | uvicorn[standard]
4 | kthread
5 | cachetools
6 | 


--------------------------------------------------------------------------------
/plugin_server/summarizer_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from argument_models import create_function_validator
 4 | from pydantic import Field, root_validator
 5 | from summarizer import SummarizerPlugin
 6 | 
 7 | 
 8 | class SummarizerFactory:
 9 |     def __init__(self):
10 |         self.plugin = SummarizerPlugin()
11 |         self.func = self.plugin.summarize
12 |         (
13 |             self.batch_validator,
14 |             self.required_validator,
15 |             self.argument_validator,
16 |             self.full_validator,
17 |         ) = create_function_validator(
18 |             self.func,
19 |             [("batch", List[str])],
20 |             {
21 |                 "ratio": (
22 |                     float,
23 |                     Field(
24 |                         0.2,
25 |                         gt=0,
26 |                         lt=1,
27 |                         description="The ratio must be in the closed interval (0,1)",
28 |                     ),
29 |                 )
30 |             },
31 |         )
32 |         try:
33 |             self.metadata = self.plugin.metadata()
34 |         except AttributeError:
35 |             self.metadata = {}
36 | 


--------------------------------------------------------------------------------
/plugin_server/utils/thread.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import kthread
 4 | 
 5 | from utils.aio import to_thread, wait_first
 6 | 
 7 | 
 8 | class CancableThread(kthread.KThread):
 9 |     def run(self):
10 |         self.result = None
11 |         self.exc = None
12 |         try:
13 |             self.result = self._target(*self._args, **self._kwargs)
14 |         except Exception as e:
15 |             self.exc = e
16 | 
17 |     async def _run_async(self):
18 |         self.start()
19 |         try:
20 |             await to_thread(self.join)
21 |             if self.exc:
22 |                 raise self.exc
23 |             return self.result
24 |         except asyncio.CancelledError:
25 |             if self.is_alive():
26 |                 self.terminate()
27 |             raise
28 | 
29 |     async def run_until_finish_or_event(self, event):
30 |         result, _ = await wait_first([self._run_async(), event.wait()])
31 |         return result
32 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | GitPython
2 | click
3 | docker
4 | giturlparse
5 | pydantic<2
6 | requests
7 | ruamel.yaml
8 | termcolor
9 | 


--------------------------------------------------------------------------------
/schema/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | 


--------------------------------------------------------------------------------
/summarizers/aosumm/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | transformers = "*"
10 | torch = "*"
11 | sentencepiece = "*"
12 | protobuf = "==3.20.*"
13 | 
14 | [requires]
15 | python_version = "3.10"
16 | 


--------------------------------------------------------------------------------
/summarizers/aosumm/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from summarizer import MODEL, SummarizerPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 | 
10 |     logger.info("downloading %s", MODEL)
11 |     SummarizerPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{", level="INFO")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/summarizers/aosumm/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | from pydantic import Field
 2 | 
 3 | from .summarizer import Generator
 4 | 
 5 | MODEL = "hyunwoongko/ctrlsum-cnndm"
 6 | 
 7 | 
 8 | class SummarizerPlugin:
 9 |     def __init__(self):
10 |         self.model = Generator(MODEL)
11 |         self.meta = {"model": self.model.model_name}
12 | 
13 |     def summarize(
14 |         self,
15 |         batch,
16 |         ratio,
17 |         keywords: str = Field(..., min_length=1),
18 |         use_contrastive_search: bool = True,
19 |     ):
20 |         return [
21 |             self.model.summarize(
22 |                 text,
23 |                 keywords,
24 |                 ratio=ratio,
25 |                 use_contrastive_search=use_contrastive_search,
26 |             )
27 |             for text in batch
28 |         ]
29 | 
30 |     def metadata(self):
31 |         return self.meta
32 | 


--------------------------------------------------------------------------------
/summarizers/aosumm/summarizer/summarizer.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 4 | 
 5 | from .transformer_summarizer import TransformerSummarizer
 6 | 
 7 | word_re = re.compile(r"[^\s]+")
 8 | 
 9 | 
10 | class Generator:
11 |     def __init__(self, model_name):
12 |         self.model_name = model_name
13 |         self.tokenizer = AutoTokenizer.from_pretrained(
14 |             self.model_name, tokenizer_file=None
15 |         )
16 |         self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
17 |         self.model.eval()
18 |         self.chunker = TransformerSummarizer(
19 |             generator=self.model,
20 |             tokenizer=self.tokenizer,
21 |             default_arguments={"do_sample": True, "repetition_penalty": 1.2},
22 |         )
23 | 
24 |     def summarize(
25 |         self,
26 |         text: str,
27 |         keywords: str,
28 |         *,
29 |         use_contrastive_search: bool,
30 |         ratio: float = 0.2
31 |     ):
32 |         keywords = word_re.findall(keywords)
33 |         prompt = " | ".join(keywords) + " - "
34 |         return self.chunker(
35 |             text,
36 |             use_contrastive_search=use_contrastive_search,
37 |             prompt=prompt,
38 |             ratio=ratio,
39 |         )
40 | 


--------------------------------------------------------------------------------
/summarizers/aosumm/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "AOSUMM"
3 | metadata:
4 |   type: abstractive
5 |   sourcecode: https://github.com/oja/aosumm
6 | 


--------------------------------------------------------------------------------
/summarizers/argpagerank/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | absl-py = "*"
 8 | numpy = "*"
 9 | discretemarkovchain = "*"
10 | nltk = "*"
11 | scikit-learn = "*"
12 | tensorflow = "*"
13 | tensorflow-hub = "*"
14 | 
15 | [dev-packages]
16 | 
17 | [requires]
18 | python_version = "3.10"
19 | 


--------------------------------------------------------------------------------
/summarizers/argpagerank/model_setup.py:
--------------------------------------------------------------------------------
 1 | from urllib import response
 2 | import requests
 3 | from pathlib import Path
 4 | import os
 5 | import nltk
 6 | 
 7 | DATA_URL = "https://files.webis.de/summarization-models/arg-pagerank/data/claim_lexicon.txt"
 8 | SAVE_DIR = Path("~/data").expanduser()
 9 | 
10 | def setup():
11 |     Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
12 |     response = requests.get(DATA_URL, stream=False)
13 |     with open(SAVE_DIR / "claim_lexicon.txt", 'wb') as outf:
14 |         outf.write(response.content)
15 |     print("Downloaded claim lexicon file")
16 |     nltk.download('punkt')
17 | 
18 | if __name__ == "__main__":
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/summarizers/argpagerank/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "ArgsRank"
3 | metadata:
4 |   type: extractive
5 |   homepage: https://dl.acm.org/doi/10.1145/3397271.3401186
6 |   sourcecode: https://github.com/webis-de/sigir20-extractive-snippet-generation-for-arguments
7 | 


--------------------------------------------------------------------------------
/summarizers/bertsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | ipython = "*"
 8 | 
 9 | [packages]
10 | torch = "*"
11 | sentence_transformers = "*"
12 | scikit-learn = "*"
13 | spacy = "*"
14 | 
15 | [requires]
16 | python_version = "3.10"
17 | 


--------------------------------------------------------------------------------
/summarizers/bertsum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from summarizer import SummarizerPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     SummarizerPlugin()
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
13 |     DATEFMT = "%H:%M:%S"
14 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO, style="{")
15 |     setup()
16 | 


--------------------------------------------------------------------------------
/summarizers/bertsum/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .ModelProcessor import BertSummarizer
 4 | 
 5 | MODEL = os.environ.get("model") or "distilbert-base-uncased"
 6 | 
 7 | 
 8 | class SummarizerPlugin:
 9 |     def __init__(self):
10 |         self.bertsum = BertSummarizer(model=MODEL, reduce_option="max")
11 | 
12 |     def summarize(self, batch, ratio):
13 |         return [
14 |             self.bertsum(text, min_length=0, max_length=500, ratio=ratio)
15 |             for text in batch
16 |         ]
17 | 


--------------------------------------------------------------------------------
/summarizers/bertsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: BERTSummarizer
3 | metadata:
4 |   type: extractive
5 |   homepage: https://arxiv.org/abs/1906.04165
6 | 


--------------------------------------------------------------------------------
/summarizers/biasedtextrank/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | pytextrank = "*"
 8 | 
 9 | [dev-packages]
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/summarizers/biasedtextrank/model_setup.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | def setup():
 5 |     process = subprocess.run(['python', '-m', 'spacy', 'download', 'en_core_web_sm'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 6 | 
 7 | if __name__ == "__main__":
 8 |     setup()
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/summarizers/biasedtextrank/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "Biased TextRank"
3 | metadata:
4 |   type: extractive
5 |   sourcecode: "https://github.com/DerwenAI/pytextrank"
6 | 


--------------------------------------------------------------------------------
/summarizers/brio/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | transformers = "*"
10 | torch = "*"
11 | sentencepiece = "*"
12 | protobuf = "==3.20.*"
13 | 
14 | [requires]
15 | python_version = "3.10"
16 | 


--------------------------------------------------------------------------------
/summarizers/brio/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from summarizer import SummarizerPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 | 
10 |     logger.info("downloading model")
11 |     SummarizerPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{", level="INFO")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/summarizers/brio/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | from .summarizer import Generator
 4 | 
 5 | MODELS = {
 6 |     "CNNDM-uncased": "Yale-LILY/brio-cnndm-uncased",
 7 |     "CNNDM-cased": "Yale-LILY/brio-cnndm-cased",
 8 |     "XSUM-cased": "Yale-LILY/brio-xsum-cased",
 9 | }
10 | 
11 | 
12 | class SummarizerPlugin:
13 |     def __init__(self, model=None):
14 |         model = model or environ["model"]
15 |         self.model = Generator(MODELS[model])
16 |         self.meta = {"model": self.model.model_name}
17 | 
18 |     def summarize(self, batch, ratio):
19 |         return [self.model.summarize(text, ratio) for text in batch]
20 | 
21 |     def summarize(
22 |         self,
23 |         batch,
24 |         ratio,
25 |         use_contrastive_search: bool = True,
26 |     ):
27 |         return [
28 |             self.model.summarize(
29 |                 text,
30 |                 ratio=ratio,
31 |                 use_contrastive_search=use_contrastive_search,
32 |             )
33 |             for text in batch
34 |         ]
35 | 
36 |     def metadata(self):
37 |         return self.meta
38 | 


--------------------------------------------------------------------------------
/summarizers/brio/summarizer/summarizer.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 2 | 
 3 | from .transformer_summarizer import TransformerSummarizer
 4 | 
 5 | 
 6 | class Generator:
 7 |     def __init__(self, model_name):
 8 |         self.model_name = model_name
 9 |         self.tokenizer = AutoTokenizer.from_pretrained(
10 |             self.model_name, tokenizer_file=None
11 |         )
12 |         self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
13 |         self.model.eval()
14 |         self.chunker = TransformerSummarizer(
15 |             generator=self.model,
16 |             tokenizer=self.tokenizer,
17 |             default_arguments={"do_sample": True, "repetition_penalty": 1.2},
18 |         )
19 | 
20 |     def summarize(self, text: str, *, use_contrastive_search: bool, ratio: float = 0.2):
21 |         return self.chunker(
22 |             text,
23 |             use_contrastive_search=use_contrastive_search,
24 |             ratio=ratio,
25 |         )
26 | 


--------------------------------------------------------------------------------
/summarizers/brio/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "BRIO ({model})"
3 | metadata:
4 |   type: abstractive
5 |   sourcecode: https://github.com/yixinL7/BRIO
6 |   homepage: https://arxiv.org/abs/2203.16804
7 | 


--------------------------------------------------------------------------------
/summarizers/cliffsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | torch = "*"
 8 | transformers = "*"
 9 | sentencepiece = "*"
10 | protobuf = "==3.20.*"
11 | nltk = "*"
12 | 
13 | [dev-packages]
14 | 
15 | [requires]
16 | python_version = "3.10"
17 | 


--------------------------------------------------------------------------------
/summarizers/cliffsum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from summarizer import SummarizerPlugin
 4 | 
 5 | 
 6 | def setup():
 7 |     logging.info("Downloading checkpoints")
 8 |     SummarizerPlugin()
 9 |     logging.info("Done")
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
14 |     DATEFMT = "%H:%M:%S"
15 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO, style="{")
16 |     setup()
17 | 


--------------------------------------------------------------------------------
/summarizers/cliffsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "CLIFF-{model}"
3 | metadata:
4 |   type: abstractive
5 |   homepage: "https://aclanthology.org/2021.emnlp-main.532/"
6 |   sourcecode: "https://github.com/ShuyangCao/cliff_summ"
7 | 


--------------------------------------------------------------------------------
/summarizers/conclugen/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | requests = "*"
 8 | torch = "*"
 9 | transformers = "*"
10 | 
11 | [dev-packages]
12 | 
13 | [requires]
14 | python_version = "3.10"
15 | 


--------------------------------------------------------------------------------
/summarizers/conclugen/model_setup.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from summarizer import SummarizerPlugin
 4 | 
 5 | 
 6 | def setup():
 7 |     logging.info("Downloading checkpoints")
 8 |     SummarizerPlugin()
 9 |     logging.info("Done")
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
14 |     DATEFMT = "%H:%M:%S"
15 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.INFO, style="{")
16 |     setup()
17 | 


--------------------------------------------------------------------------------
/summarizers/conclugen/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from .summarizer import ConcluGen
 5 | 
 6 | SAVE_DIR = Path("~/checkpoints").expanduser()
 7 | 
 8 | 
 9 | class SummarizerPlugin:
10 |     def __init__(self):
11 |         url = "https://files.webis.de/webis-conclugen21-models/dbart.tar.gz"
12 |         path = SAVE_DIR / "dbart"
13 |         self.meta = {"model": url}
14 |         self.summarizer = ConcluGen(path, url)
15 | 
16 |     def summarize(self, batch, ratio, use_contrastive_search: bool = True):
17 |         return [
18 |             self.summarizer.summarize(
19 |                 text, ratio=ratio, use_contrastive_search=use_contrastive_search
20 |             )
21 |             for text in batch
22 |         ]
23 | 
24 |     def metadata(self):
25 |         return self.meta
26 | 


--------------------------------------------------------------------------------
/summarizers/conclugen/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "ConcluGen"
3 | metadata:
4 |   type: abstractive
5 |   homepage: https://aclanthology.org/2021.findings-acl.306/
6 |   sourcecode: https://github.com/webis-de/acl21-informative-conclusion-generation
7 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | torch = "*"
 8 | transformers = "*"
 9 | sentencepiece = "==0.1.96"
10 | py-rouge = "*"
11 | tqdm = "*"
12 | pandas = "*"
13 | allennlp = "*"
14 | requests = "*"
15 | nltk = "*"
16 | huggingface-hub = "*"
17 | 
18 | [dev-packages]
19 | 
20 | [requires]
21 | python_version = "3.10"
22 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | from summarizer import MODEL
 4 | import nltk
 5 | from summarizer import SummarizerPlugin
 6 | 
 7 | 
 8 | def setup():
 9 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
10 |     logger.info("Initializing %s", MODEL)
11 |     nltk.download("punkt")
12 |     SummarizerPlugin()
13 |     logger.info("Done initializing %s", MODEL)
14 | 
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
19 |     DATEFMT = "%H:%M:%S"
20 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
21 |     setup()
22 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/config/bimeanvae/amzn.jsonnet:
--------------------------------------------------------------------------------
 1 | local lib = import '../utils.libsonnet';
 2 | local data_type = "amzn";
 3 | local latent_dim = 512;
 4 | local free_bit = 0.25;
 5 | local num_steps = 100000;
 6 | local checkout_step = 1000;
 7 | local batch_size = 256;
 8 | local lr = 1e-3;
 9 | 
10 | {
11 |     "data_dir": "./data/%s" % data_type,
12 |     "spm_path": "./data/sentencepiece/%s.model" % data_type,
13 |     "model": lib.BiMeanVAE(latent_dim, free_bit),
14 |     "trainer": lib.VAETrainer(num_steps, checkout_step, batch_size, lr)
15 | }
16 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/config/bimeanvae/yelp.jsonnet:
--------------------------------------------------------------------------------
 1 | local lib = import '../utils.libsonnet';
 2 | local data_type = "yelp";
 3 | local latent_dim = 512;
 4 | local free_bit = 0.25;
 5 | local num_steps = 100000;
 6 | local checkout_step = 1000;
 7 | local batch_size = 256;
 8 | local lr = 1e-3;
 9 | 
10 | {
11 |     "data_dir": "./data/%s" % data_type,
12 |     "spm_path": "./data/sentencepiece/%s.model" % data_type,
13 |     "model": lib.BiMeanVAE(latent_dim, free_bit),
14 |     "trainer": lib.VAETrainer(num_steps, checkout_step, batch_size, lr)
15 | }
16 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/config/optimus/amzn.jsonnet:
--------------------------------------------------------------------------------
 1 | local lib = import '../utils.libsonnet';
 2 | local data_type = "amzn";
 3 | local latent_dim = 512;
 4 | local free_bit = 2.0;
 5 | local num_steps = 500000;
 6 | local checkout_step = 20000;
 7 | local batch_size = 4;
 8 | local lr = 1e-5;
 9 | 
10 | {
11 |     "data_dir": "./data/%s" % data_type,
12 |     "model": lib.Optimus(latent_dim, free_bit),
13 |     "trainer": lib.VAETrainer(num_steps, checkout_step, batch_size, lr)
14 | }
15 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/config/optimus/yelp.jsonnet:
--------------------------------------------------------------------------------
 1 | local lib = import '../utils.libsonnet';
 2 | local data_type = "yelp";
 3 | local latent_dim = 512;
 4 | local free_bit = 2.0;
 5 | local num_steps = 500000;
 6 | local checkout_step = 20000;
 7 | local batch_size = 4;
 8 | local lr = 1e-5;
 9 | 
10 | {
11 |     "data_dir": "./data/%s" % data_type,
12 |     "model": lib.Optimus(latent_dim, free_bit),
13 |     "trainer": lib.VAETrainer(num_steps, checkout_step, batch_size, lr)
14 | }
15 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/config/utils.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     BiMeanVAE(latent_dim, free_bit)::
 3 |         local embedding_dim = 256;
 4 |         local hidden_size = 512;
 5 |         local num_layers = 1;
 6 |         {
 7 |             "type": "bimeanvae",
 8 |             "embedding_dim": embedding_dim,
 9 |             "hidden_size": hidden_size,
10 |             "latent_dim": latent_dim,
11 |             "num_layers": num_layers,
12 |             "free_bit": free_bit
13 |         },
14 | 
15 |     Optimus(latent_dim, free_bit)::
16 |         {
17 |             "type": "optimus",
18 |             "latent_dim": latent_dim,
19 |             "free_bit": free_bit,
20 |         },
21 | 
22 |     VAETrainer(num_steps, checkout_step, batch_size, lr)::
23 |         {
24 |             "num_steps": num_steps,
25 |             "checkout_step": checkout_step,
26 |             "batch_size": batch_size,
27 |             "lr": lr
28 |         }
29 | }


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/coop/__init__.py:
--------------------------------------------------------------------------------
1 | from . import util
2 | from .vae import VAE
3 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/coop/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Model
2 | from .bimeanvae import BiMeanVAE
3 | from .optimus import Optimus
4 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/coop/models/base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Model(nn.Module):
 6 | 
 7 |     def __init__(self,
 8 |                  hidden_size: int,
 9 |                  latent_dim: int):
10 |         super().__init__()
11 |         self.hidden_size = hidden_size
12 |         self.latent_dim = latent_dim
13 | 
14 |     def forward(self,
15 |                 src: torch.Tensor,
16 |                 tgt: torch.Tensor = None,
17 |                 do_generate: torch.Tensor = False,
18 |                 **kwargs):
19 |         raise NotImplementedError()
20 | 
21 |     @torch.no_grad()
22 |     def generate(self,
23 |                  z: torch.Tensor,
24 |                  num_beams: int = 4,
25 |                  max_tokens: int = 256):
26 |         raise NotImplementedError()
27 | 
28 |     @staticmethod
29 |     def klw(step: int,
30 |             interval: int,
31 |             r: float = 0.8,
32 |             t: float = 0.0,
33 |             s: int = 10000):
34 |         raise NotImplementedError()
35 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/summarizer/coop/coop/models/util.py:
--------------------------------------------------------------------------------
 1 | from typing import NamedTuple
 2 | 
 3 | import torch
 4 | from torch.distributions import Normal
 5 | 
 6 | 
 7 | class Losses(NamedTuple):
 8 |     nll: torch.Tensor
 9 |     zkl: torch.Tensor
10 |     zkl_real: torch.Tensor
11 | 
12 | 
13 | class VAEOut(NamedTuple):
14 |     q: Normal
15 |     generated: torch.Tensor = None
16 | 


--------------------------------------------------------------------------------
/summarizers/coopsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "{model}"
3 | metadata:
4 |   type: abstractive
5 |   homepage: "https://aclanthology.org/2021.findings-emnlp.328/"
6 |   sourcecode: "https://github.com/megagonlabs/coop"
7 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | scipy = "*"
 8 | numpy = "*"
 9 | scikit-learn = "*"
10 | spacy = "*"
11 | pandas = "*"
12 | 
13 | [dev-packages]
14 | 
15 | [requires]
16 | python_version = "3.10"
17 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/model_setup.py:
--------------------------------------------------------------------------------
 1 | from summarizer import SummarizerPlugin
 2 | 
 3 | 
 4 | def setup():
 5 |     SummarizerPlugin()
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     setup()
10 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .scores import Scorer
 4 | 
 5 | 
 6 | class SummarizerPlugin:
 7 |     def __init__(self, model=None):
 8 |         self.model_name = model or os.environ.get("model") or "en_core_web_lg"
 9 |         self.model = Scorer(self.model_name)
10 | 
11 |     def summarize(
12 |         self,
13 |         batch,
14 |         ratio,
15 |         title: str = "",
16 |         use_tfidf: bool = True,
17 |         use_special_tokens: bool = True,
18 |         use_position: bool = True,
19 |         use_average_lexical_connectivity: bool = True,
20 |         use_content_words_ratio: bool = True,
21 |         use_length: bool = True,
22 |         use_rank: bool = True,
23 |     ):
24 |         return [
25 |             self.model.summarize(
26 |                 text,
27 |                 ratio,
28 |                 title=title,
29 |                 use_tfidf=use_tfidf,
30 |                 use_special_tokens=use_special_tokens,
31 |                 use_position=use_position,
32 |                 use_average_lexical_connectivity=use_average_lexical_connectivity,
33 |                 use_content_words_ratio=use_content_words_ratio,
34 |                 use_length=use_length,
35 |                 use_rank=use_rank,
36 |             )
37 |             for text in batch
38 |         ]
39 | 
40 |     def metadata(self):
41 |         return {"model": self.model_name}
42 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/average_lexical_connectivity.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import numpy as np
 4 | from sklearn.feature_extraction.text import CountVectorizer
 5 | 
 6 | from .util import filter_tokens, normalize
 7 | 
 8 | 
 9 | def average_lexical_connectivity(sentences):
10 |     """number of terms in a sentence shared with other sentences
11 |     divided by the length of the sentence"""
12 |     analyzer = partial(filter_tokens, use_lemma=True)
13 |     vectorizer = CountVectorizer(binary=True, analyzer=analyzer)
14 |     try:
15 |         matrix = vectorizer.fit_transform(sentences)
16 |     except ValueError:
17 |         return [1] * len(sentences)
18 |     shared_terms = np.asarray(matrix.sum(axis=0)).ravel() > 1
19 |     features = []
20 |     for row_index in range(matrix.shape[0]):
21 |         feature = shared_terms[matrix[row_index].nonzero()[1]].sum()
22 |         feature_len = max(len(analyzer(sentences[row_index])), 1)
23 |         features.append(feature / feature_len)
24 |     return normalize(features)
25 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/content_words_ratio.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .util import filter_tokens, normalize
 4 | 
 5 | 
 6 | def get_stopwords(sentence):
 7 |     return [token for token in sentence if token.is_stop]
 8 | 
 9 | 
10 | def content_words_ratio(sentences):
11 |     """number of non-stopwords in the sentence divided by the
12 |     number of words in the sentence"""
13 |     score = 1 - np.array(
14 |         [
15 |             len(get_stopwords(sent))
16 |             / np.max((len(filter_tokens(sent, remove_stopwords=False)), 1))
17 |             for sent in sentences
18 |         ]
19 |     )
20 |     return normalize(score)
21 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/length.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .util import filter_tokens, normalize
 4 | 
 5 | 
 6 | def length_score(sentences):
 7 |     """logarithm of the sentence length"""
 8 |     sentence_length = np.array([len(filter_tokens(sent)) for sent in sentences])
 9 |     scores = normalize(np.log(sentence_length + 3))
10 |     return scores
11 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/position.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import norm
 3 | 
 4 | 
 5 | def position_score(sentences):
 6 |     """Percentiles of the normal distribution where the percentiles
 7 |     correspond to the position of the sentences in the document and
 8 |     are equally distributed in the interval (0, 1). Sentences closer
 9 |     to the beginning of the document get a higher score."""
10 |     num_sentences = len(sentences)
11 |     percentile = (np.arange(num_sentences) + 1) / (num_sentences + 1)
12 |     scores = norm.isf(percentile)
13 |     return scores
14 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/special_tokens.py:
--------------------------------------------------------------------------------
 1 | from .util import normalize
 2 | 
 3 | ARTICLES = {"a", "an"}
 4 | SPECIAL_POS_TAGS = {"NOUN", "ADJ", "PROPN"}
 5 | 
 6 | 
 7 | def has_special_feature(token):
 8 |     return (
 9 |         token.is_currency
10 |         or token.is_digit
11 |         or (token.text in ARTICLES and token.pos_ == "DET")
12 |         or token.pos_ in SPECIAL_POS_TAGS
13 |         or token.ent_type_ != ""
14 |         # or token.ent_type_ == "DATE"
15 |     )
16 | 
17 | 
18 | def special_token_score(sentences):
19 |     """number of special tokens (currency, digit, noun, adjective, proper noun, 'a'/'an', has an entity type) devided by length of sentence"""
20 |     features = []
21 |     for sentence in sentences:
22 |         special_tokens = [token for token in sentence if has_special_feature(token)]
23 |         score = len(special_tokens) / len(sentence)
24 |         features.append(score)
25 |     return normalize(features)
26 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/tfidf.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import numpy as np
 4 | from sklearn.feature_extraction.text import TfidfVectorizer
 5 | 
 6 | from .util import filter_tokens, normalize
 7 | 
 8 | 
 9 | def tfidf_score(sentences, use_lemma=False):
10 |     """compute the score as the normalization of the sum of the tfidf vector
11 |     entries minus the logarithm of the sentence length"""
12 |     analyzer = partial(filter_tokens, use_lemma=use_lemma)
13 |     vectorizer = TfidfVectorizer(analyzer=analyzer)
14 |     try:
15 |         tfidf = vectorizer.fit_transform(sentences)
16 |     except ValueError:
17 |         return np.ones(len(sentences))
18 |     sentence_lengths = np.array([len(analyzer(s)) for s in sentences])
19 |     tfidf_sum = np.asarray(tfidf.sum(axis=1)).ravel()
20 |     n_tfidf_sum = normalize(tfidf_sum)
21 |     n_log_len = normalize(np.log(sentence_lengths + 3))
22 |     return n_tfidf_sum - n_log_len
23 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def tokenize(document, nlp):
 5 |     return [sent for sent in nlp(document).sents if sent.text.strip() != ""]
 6 | 
 7 | 
 8 | def filter_tokens(tokens, remove_stopwords=True, use_lemma=False):
 9 |     return [
10 |         token.lemma_.lower() if use_lemma else token.text
11 |         for token in tokens
12 |         if (not remove_stopwords or not token.is_stop)
13 |         and not token.is_punct
14 |         and not token.is_space
15 |     ]
16 | 
17 | 
18 | def normalize(arr):
19 |     arr = np.array(arr)
20 |     return (arr - np.mean(arr)) / np.std(arr)
21 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/summarizer/scores/word_overlap.py:
--------------------------------------------------------------------------------
 1 | from itertools import chain
 2 | 
 3 | import numpy as np
 4 | 
 5 | from .util import filter_tokens, normalize, tokenize
 6 | 
 7 | 
 8 | class WordOverlap:
 9 |     def __init__(self, words, nlp):
10 |         if isinstance(words, str):
11 |             sentences = tokenize(words, nlp)
12 |             words = list(
13 |                 chain.from_iterable(
14 |                     [filter_tokens(sent, use_lemma=True) for sent in sentences]
15 |                 )
16 |             )
17 |         else:
18 |             words = words.copy()
19 |         self.words = set(words)
20 | 
21 |     def score(self, sentences):
22 |         """number of words shared with the title"""
23 |         features = []
24 |         for sentence in sentences:
25 |             words = set(filter_tokens(sentence, use_lemma=True))
26 |             feature = len(words & self.words)
27 |             features.append(feature)
28 |         features = np.array(features)
29 |         return (features - features.mean()) / (features.std() + 0.3)
30 | 


--------------------------------------------------------------------------------
/summarizers/featuresum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "FeatureSum"
3 | metadata:
4 |   type: extractive
5 | 


--------------------------------------------------------------------------------
/summarizers/gsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | torch = "==1.5.0"
 8 | numpy = "<1.24"
 9 | requests = "*"
10 | tqdm = "*"
11 | regex = "*"
12 | 
13 | [dev-packages]
14 | 
15 | [requires]
16 | python_version = "3.8"  # 3.9 or higher is not supported because of torch 1.5 dependency
17 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | __all__ = ['pdb']
 7 | __version__ = '0.9.0'
 8 | 
 9 | import sys
10 | sys.path.insert(0,"../fairseq")
11 | 
12 | # backwards compatibility to support `from fairseq.meters import AverageMeter`
13 | from fairseq.logging import meters, metrics, progress_bar  # noqa
14 | sys.modules['fairseq.meters'] = meters
15 | sys.modules['fairseq.metrics'] = metrics
16 | sys.modules['fairseq.progress_bar'] = progress_bar
17 | 
18 | import fairseq.criterions  # noqa
19 | import fairseq.models  # noqa
20 | import fairseq.modules  # noqa
21 | import fairseq.optim  # noqa
22 | import fairseq.optim.lr_scheduler  # noqa
23 | import fairseq.pdb  # noqa
24 | import fairseq.tasks  # noqa
25 | 
26 | import fairseq.benchmark  # noqa
27 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # import models/tasks to register them
 7 | from . import (  # noqa
 8 |     dummy_lm,
 9 |     dummy_masked_lm,
10 |     dummy_model,
11 | )
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/clib/libbleu/module.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2017-present, Facebook, Inc.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #include <Python.h>
10 | 
11 | 
12 | static PyMethodDef method_def[] = {
13 |   {NULL, NULL, 0, NULL}
14 | };
15 | 
16 | static struct PyModuleDef module_def = {
17 |    PyModuleDef_HEAD_INIT,
18 |    "libbleu",   /* name of module */
19 |    NULL,     /* module documentation, may be NULL */
20 |    -1,       /* size of per-interpreter state of the module,
21 |                 or -1 if the module keeps state in global variables. */
22 |    method_def
23 | };
24 | 
25 | 
26 | #if PY_MAJOR_VERSION == 2
27 | PyMODINIT_FUNC init_libbleu()
28 | #else
29 | PyMODINIT_FUNC PyInit_libbleu()
30 | #endif
31 | {
32 |   PyObject *m = PyModule_Create(&module_def);
33 |   if (!m) {
34 |     return NULL;
35 |   }
36 |   return m;
37 | }
38 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/clib/libnat_cuda/edit_dist.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2017-present, Facebook, Inc.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <torch/extension.h>
12 | 
13 | torch::Tensor LevenshteinDistanceCuda(
14 |         torch::Tensor source,
15 |         torch::Tensor target,
16 |         torch::Tensor source_length,
17 |         torch::Tensor target_length);
18 | 
19 | torch::Tensor GenerateDeletionLabelCuda(
20 |         torch::Tensor source,
21 |         torch::Tensor operations);
22 | 
23 | std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda(
24 |         torch::Tensor source,
25 |         torch::Tensor operations);
26 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.criterions.fairseq_criterion import FairseqCriterion
11 | 
12 | 
13 | build_criterion, register_criterion, CRITERION_REGISTRY = registry.setup_registry(
14 |     '--criterion',
15 |     base_class=FairseqCriterion,
16 |     default='cross_entropy',
17 | )
18 | 
19 | 
20 | # automatically import any Python files in the criterions/ directory
21 | for file in os.listdir(os.path.dirname(__file__)):
22 |     if file.endswith('.py') and not file.startswith('_'):
23 |         module = file[:file.find('.py')]
24 |         importlib.import_module('fairseq.criterions.' + module)
25 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/append_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class AppendTokenDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, token=None):
15 |         super().__init__(dataset)
16 |         self.token = token
17 |         if token is not None:
18 |             self._sizes = np.array(dataset.sizes) + 1
19 |         else:
20 |             self._sizes = dataset.sizes
21 | 
22 |     def __getitem__(self, idx):
23 |         item = self.dataset[idx]
24 |         if self.token is not None:
25 |             item = torch.cat([item, item.new([self.token])])
26 |         return item
27 | 
28 |     @property
29 |     def sizes(self):
30 |         return self._sizes
31 | 
32 |     def num_tokens(self, index):
33 |         n = self.dataset.num_tokens(index)
34 |         if self.token is not None:
35 |             n += 1
36 |         return n
37 | 
38 |     def size(self, index):
39 |         n = self.dataset.size(index)
40 |         if self.token is not None:
41 |             n += 1
42 |         return n
43 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/audio/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/summarizers/gsum/guided_summarization/bart/fairseq/data/audio/__init__.py


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/colorize_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class ColorizeDataset(BaseWrapperDataset):
12 |     """ Adds 'colors' property to net input that is obtained from the provided color getter for use by models """
13 |     def __init__(self, dataset, color_getter):
14 |         super().__init__(dataset)
15 |         self.color_getter = color_getter
16 | 
17 |     def collater(self, samples):
18 |         base_collate = super().collater(samples)
19 |         if len(base_collate) > 0:
20 |             base_collate["net_input"]["colors"] = torch.tensor(
21 |                 list(self.color_getter(self.dataset, s["id"]) for s in samples),
22 |                 dtype=torch.long,
23 |             )
24 |         return base_collate
25 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | import importlib
 8 | import os
 9 | 
10 | from fairseq import registry
11 | 
12 | 
13 | build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY = registry.setup_registry(
14 |     '--tokenizer',
15 |     default=None,
16 | )
17 | 
18 | 
19 | build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry(
20 |     '--bpe',
21 |     default=None,
22 | )
23 | 
24 | 
25 | # automatically import any Python files in the encoders/ directory
26 | for file in os.listdir(os.path.dirname(__file__)):
27 |     if file.endswith('.py') and not file.startswith('_'):
28 |         module = file[:file.find('.py')]
29 |         importlib.import_module('fairseq.data.encoders.' + module)
30 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/byte_bpe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq import file_utils
 8 | from fairseq.data.encoders import register_bpe
 9 | from fairseq.data.encoders.byte_utils import (byte_encode, smart_byte_decode,
10 |                                               SPACE, SPACE_ESCAPE)
11 | 
12 | 
13 | @register_bpe('byte_bpe')
14 | class ByteBPE(object):
15 |     @staticmethod
16 |     def add_args(parser):
17 |         # fmt: off
18 |         parser.add_argument('--sentencepiece-model-path', type=str,
19 |                             help='path to sentencepiece model')
20 |         # fmt: on
21 | 
22 |     def __init__(self, args):
23 |         vocab = file_utils.cached_path(args.sentencepiece_model_path)
24 |         try:
25 |             import sentencepiece as spm
26 |             self.sp = spm.SentencePieceProcessor()
27 |             self.sp.Load(vocab)
28 |         except ImportError:
29 |             raise ImportError('Please install sentencepiece with: pip install sentencepiece')
30 | 
31 |     def encode(self, x: str) -> str:
32 |         byte_encoded = byte_encode(x)
33 |         return SPACE.join(self.sp.EncodeAsPieces(byte_encoded))
34 | 
35 |     @staticmethod
36 |     def decode(x: str) -> str:
37 |         unescaped = x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
38 |         return smart_byte_decode(unescaped)
39 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/bytes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq.data.encoders import register_bpe
 8 | from fairseq.data.encoders.byte_utils import (byte_encode, smart_byte_decode,
 9 |                                               SPACE, SPACE_ESCAPE)
10 | 
11 | 
12 | @register_bpe('bytes')
13 | class Bytes(object):
14 |     def __init__(self, args):
15 |         pass
16 | 
17 |     @staticmethod
18 |     def add_args(parser):
19 |         pass
20 | 
21 |     @staticmethod
22 |     def encode(x: str) -> str:
23 |         encoded = byte_encode(x)
24 |         escaped = encoded.replace(SPACE, SPACE_ESCAPE)
25 |         return SPACE.join(list(escaped))
26 | 
27 |     @staticmethod
28 |     def decode(x: str) -> str:
29 |         unescaped = x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
30 |         return smart_byte_decode(unescaped)
31 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/characters.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq.data.encoders import register_bpe
 8 | 
 9 | SPACE = chr(32)
10 | SPACE_ESCAPE = chr(9601)
11 | 
12 | 
13 | @register_bpe('characters')
14 | class Characters(object):
15 |     def __init__(self, args):
16 |         pass
17 | 
18 |     @staticmethod
19 |     def add_args(parser):
20 |         pass
21 | 
22 |     @staticmethod
23 |     def encode(x: str) -> str:
24 |         escaped = x.replace(SPACE, SPACE_ESCAPE)
25 |         return SPACE.join(list(escaped))
26 | 
27 |     @staticmethod
28 |     def decode(x: str) -> str:
29 |         return x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
30 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/fastbpe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq import file_utils
 7 | from fairseq.data.encoders import register_bpe
 8 | 
 9 | 
10 | @register_bpe('fastbpe')
11 | class fastBPE(object):
12 | 
13 |     @staticmethod
14 |     def add_args(parser):
15 |         # fmt: off
16 |         parser.add_argument('--bpe-codes', type=str,
17 |                             help='path to fastBPE BPE')
18 |         # fmt: on
19 | 
20 |     def __init__(self, args):
21 |         if args.bpe_codes is None:
22 |             raise ValueError('--bpe-codes is required for --bpe=subword_nmt')
23 |         codes = file_utils.cached_path(args.bpe_codes)
24 |         try:
25 |             import fastBPE
26 |             self.bpe = fastBPE.fastBPE(codes)
27 |             self.bpe_symbol = "@@ "
28 |         except ImportError:
29 |             raise ImportError('Please install fastBPE with: pip install fastBPE')
30 | 
31 |     def encode(self, x: str) -> str:
32 |         return self.bpe.apply([x])[0]
33 | 
34 |     def decode(self, x: str) -> str:
35 |         return (x + ' ').replace(self.bpe_symbol, '').rstrip()
36 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/nltk_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data.encoders import register_tokenizer
 7 | 
 8 | 
 9 | @register_tokenizer('nltk')
10 | class NLTKTokenizer(object):
11 | 
12 |     def __init__(self, source_lang=None, target_lang=None):
13 |         try:
14 |             from nltk.tokenize import word_tokenize
15 |             self.word_tokenize = word_tokenize
16 |         except ImportError:
17 |             raise ImportError('Please install nltk with: pip install nltk')
18 | 
19 |     def encode(self, x: str) -> str:
20 |         return ' '.join(self.word_tokenize(x))
21 | 
22 |     def decode(self, x: str) -> str:
23 |         return x
24 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/space_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import re
 7 | 
 8 | from fairseq.data.encoders import register_tokenizer
 9 | 
10 | 
11 | @register_tokenizer('space')
12 | class SpaceTokenizer(object):
13 | 
14 |     def __init__(self, source_lang=None, target_lang=None):
15 |         self.space_tok = re.compile(r"\s+")
16 | 
17 |     def encode(self, x: str) -> str:
18 |         return self.space_tok.sub(' ', x)
19 | 
20 |     def decode(self, x: str) -> str:
21 |         return x
22 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/encoders/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | from fairseq.data import encoders
 8 | 
 9 | 
10 | def get_whole_word_mask(args, dictionary):
11 |     bpe = encoders.build_bpe(args)
12 |     if bpe is not None:
13 |         def is_beginning_of_word(i):
14 |             if i < dictionary.nspecial:
15 |                 # special elements are always considered beginnings
16 |                 return True
17 |             tok = dictionary[i]
18 |             if tok.startswith('madeupword'):
19 |                 return True
20 |             try:
21 |                 return bpe.is_beginning_of_word(tok)
22 |             except ValueError:
23 |                 return True
24 |         mask_whole_words = torch.ByteTensor(list(
25 |             map(is_beginning_of_word, range(len(dictionary)))
26 |         ))
27 |         return mask_whole_words
28 |     return None
29 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/id_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import FairseqDataset
 9 | 
10 | 
11 | class IdDataset(FairseqDataset):
12 | 
13 |     def __getitem__(self, index):
14 |         return index
15 | 
16 |     def __len__(self):
17 |         return 0
18 | 
19 |     def collater(self, samples):
20 |         return torch.tensor(samples)
21 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/legacy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .masked_lm_dictionary import BertDictionary, MaskedLMDictionary
 7 | from .block_pair_dataset import BlockPairDataset
 8 | from .masked_lm_dataset import MaskedLMDataset
 9 | 
10 | __all__ = [
11 |     'BertDictionary',
12 |     'BlockPairDataset',
13 |     'MaskedLMDataset',
14 |     'MaskedLMDictionary',
15 | ]
16 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class ListDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, sizes=None):
12 |         super().__init__(dataset)
13 |         self._sizes = sizes
14 | 
15 |     def __iter__(self):
16 |         for x in self.dataset:
17 |             yield x
18 | 
19 |     def collater(self, samples):
20 |         return samples
21 | 
22 |     @property
23 |     def sizes(self):
24 |         return self._sizes
25 | 
26 |     def num_tokens(self, index):
27 |         return self.sizes[index]
28 | 
29 |     def size(self, index):
30 |         return self.sizes[index]
31 | 
32 |     def set_epoch(self, epoch):
33 |         pass
34 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/lru_cache_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from functools import lru_cache
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class LRUCacheDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, token=None):
14 |         super().__init__(dataset)
15 | 
16 |     @lru_cache(maxsize=8)
17 |     def __getitem__(self, index):
18 |         return self.dataset[index]
19 | 
20 |     @lru_cache(maxsize=8)
21 |     def collater(self, samples):
22 |         return self.dataset.collater(samples)
23 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/num_samples_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import FairseqDataset
 7 | 
 8 | 
 9 | class NumSamplesDataset(FairseqDataset):
10 | 
11 |     def __getitem__(self, index):
12 |         return 1
13 | 
14 |     def __len__(self):
15 |         return 0
16 | 
17 |     def collater(self, samples):
18 |         return sum(samples)
19 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/numel_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class NumelDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, reduce=False):
15 |         super().__init__(dataset)
16 |         self.reduce = reduce
17 | 
18 |     def __getitem__(self, index):
19 |         item = self.dataset[index]
20 |         if torch.is_tensor(item):
21 |             return torch.numel(item)
22 |         else:
23 |             return np.size(item)
24 | 
25 |     def __len__(self):
26 |         return len(self.dataset)
27 | 
28 |     def collater(self, samples):
29 |         if self.reduce:
30 |             return sum(samples)
31 |         else:
32 |             return torch.tensor(samples)
33 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/offset_tokens_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class OffsetTokensDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, offset):
12 |         super().__init__(dataset)
13 |         self.offset = offset
14 | 
15 |     def __getitem__(self, idx):
16 |         return self.dataset[idx] + self.offset
17 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/pad_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data import data_utils
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class PadDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, pad_idx, left_pad):
14 |         super().__init__(dataset)
15 |         self.pad_idx = pad_idx
16 |         self.left_pad = left_pad
17 | 
18 |     def collater(self, samples):
19 |         return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad)
20 | 
21 | 
22 | class LeftPadDataset(PadDataset):
23 | 
24 |     def __init__(self, dataset, pad_idx):
25 |         super().__init__(dataset, pad_idx, left_pad=True)
26 | 
27 | 
28 | class RightPadDataset(PadDataset):
29 | 
30 |     def __init__(self, dataset, pad_idx):
31 |         super().__init__(dataset, pad_idx, left_pad=False)
32 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/prepend_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class PrependDataset(BaseWrapperDataset):
13 |     def __init__(self, dataset, prepend_getter, ensure_first_token_is=None):
14 |         super().__init__(dataset)
15 |         self.prepend_getter = prepend_getter
16 |         self.ensure_first_token = ensure_first_token_is
17 | 
18 |     def __getitem__(self, idx):
19 |         item = self.dataset[idx]
20 |         is_tuple = isinstance(item, tuple)
21 |         src = item[0] if is_tuple else item
22 | 
23 |         assert self.ensure_first_token is None or src[0] == self.ensure_first_token
24 |         prepend_idx = self.prepend_getter(self.dataset, idx)
25 |         assert isinstance(prepend_idx, int)
26 |         src[0] = prepend_idx
27 |         item = tuple((src,) + item[1:]) if is_tuple else src
28 |         return item
29 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/prepend_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class PrependTokenDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, token=None):
15 |         super().__init__(dataset)
16 |         self.token = token
17 |         if token is not None:
18 |             self._sizes = np.array(dataset.sizes) + 1
19 |         else:
20 |             self._sizes = dataset.sizes
21 | 
22 |     def __getitem__(self, idx):
23 |         item = self.dataset[idx]
24 |         if self.token is not None:
25 |             item = torch.cat([item.new([self.token]), item])
26 |         return item
27 | 
28 |     @property
29 |     def sizes(self):
30 |         return self._sizes
31 | 
32 |     def num_tokens(self, index):
33 |         n = self.dataset.num_tokens(index)
34 |         if self.token is not None:
35 |             n += 1
36 |         return n
37 | 
38 |     def size(self, index):
39 |         n = self.dataset.size(index)
40 |         if self.token is not None:
41 |             n += 1
42 |         return n
43 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/raw_label_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import FairseqDataset
 9 | 
10 | 
11 | class RawLabelDataset(FairseqDataset):
12 | 
13 |     def __init__(self, labels):
14 |         super().__init__()
15 |         self.labels = labels
16 | 
17 |     def __getitem__(self, index):
18 |         return self.labels[index]
19 | 
20 |     def __len__(self):
21 |         return len(self.labels)
22 | 
23 |     def collater(self, samples):
24 |         return torch.tensor(samples)
25 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/replace_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class ReplaceDataset(BaseWrapperDataset):
10 |     """Replaces tokens found in the dataset by a specified replacement token
11 | 
12 |         Args:
13 |             dataset (~torch.utils.data.Dataset): dataset to replace tokens in
14 |             replace_map(Dictionary[int,int]): map of token to replace -> replacement token
15 |             offsets (List[int]): do not replace tokens before (from left if pos, right if neg) this offset. should be
16 |             as many as the number of objects returned by the underlying dataset __getitem__ method.
17 |         """
18 | 
19 |     def __init__(self, dataset, replace_map, offsets):
20 |         super().__init__(dataset)
21 |         assert len(replace_map) > 0
22 |         self.replace_map = replace_map
23 |         self.offsets = offsets
24 | 
25 |     def __getitem__(self, index):
26 |         item = self.dataset[index]
27 |         is_tuple = isinstance(item, tuple)
28 |         srcs = item if is_tuple else [item]
29 | 
30 |         for offset, src in zip(self.offsets, srcs):
31 |             for k, v in self.replace_map.items():
32 |                 src_off = src[offset:] if offset >= 0 else src[:offset]
33 |                 src_off.masked_fill_(src_off == k, v)
34 | 
35 |         item = srcs if is_tuple else srcs[0]
36 |         return item
37 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/roll_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class RollDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, shifts):
14 |         super().__init__(dataset)
15 |         self.shifts = shifts
16 | 
17 |     def __getitem__(self, index):
18 |         item = self.dataset[index]
19 |         return torch.roll(item, self.shifts)
20 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/sort_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class SortDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, sort_order):
14 |         super().__init__(dataset)
15 |         if not isinstance(sort_order, (list, tuple)):
16 |             sort_order = [sort_order]
17 |         self.sort_order = sort_order
18 | 
19 |         assert all(len(so) == len(dataset) for so in sort_order)
20 | 
21 |     def ordered_indices(self):
22 |         return np.lexsort(self.sort_order)
23 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/strip_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class StripTokenDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, id_to_strip):
12 |         super().__init__(dataset)
13 |         self.id_to_strip = id_to_strip
14 | 
15 |     def __getitem__(self, index):
16 |         item = self.dataset[index]
17 |         return item[item.ne(self.id_to_strip)]
18 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/data/truncate_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class TruncateDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, truncation_length):
14 |         super().__init__(dataset)
15 |         assert truncation_length is not None
16 |         self.truncation_length = truncation_length
17 |         self.dataset = dataset
18 | 
19 |     def __getitem__(self, index):
20 |         item = self.dataset[index]
21 |         item_len = item.size(0)
22 |         if item_len > self.truncation_length:
23 |             item = item[:self.truncation_length]
24 |         return item
25 | 
26 |     @property
27 |     def sizes(self):
28 |         return np.minimum(self.dataset.sizes, self.truncation_length)
29 | 
30 |     def __len__(self):
31 |         return len(self.dataset)
32 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/summarizers/gsum/guided_summarization/bart/fairseq/logging/__init__.py


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/model_parallel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import criterions, modules, models  # noqa
7 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/model_parallel/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | # automatically import any Python files in the criterions/ directory
11 | for file in os.listdir(os.path.dirname(__file__)):
12 |     if file.endswith('.py') and not file.startswith('_'):
13 |         module = file[:file.find('.py')]
14 |         importlib.import_module('fairseq.model_parallel.criterions.' + module)
15 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/model_parallel/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | for file in os.listdir(os.path.dirname(__file__)):
11 |     if file.endswith('.py') and not file.startswith('_'):
12 |         model_name = file[:file.find('.py')]
13 |         importlib.import_module('fairseq.model_parallel.models.' + model_name)
14 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/model_parallel/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .multihead_attention import ModelParallelMultiheadAttention
 7 | from .transformer_layer import ModelParallelTransformerEncoderLayer, ModelParallelTransformerDecoderLayer
 8 | 
 9 | __all__ = [
10 |     'ModelParallelMultiheadAttention',
11 |     'ModelParallelTransformerEncoderLayer',
12 |     'ModelParallelTransformerDecoderLayer',
13 | ]
14 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/bart/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .hub_interface import *  # noqa
 7 | from .guided_hub_interface import *  # noqa
 8 | from .model import *  # noqa
 9 | from .guided_model import *  # noqa
10 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/huggingface/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .hf_gpt2 import *  # noqa
7 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/nat/__init__.py:
--------------------------------------------------------------------------------
1 | from .fairseq_nat_model import *
2 | from .nonautoregressive_transformer import *
3 | from .nat_crf_transformer import *
4 | from .iterative_nonautoregressive_transformer import *
5 | from .cmlm_transformer import *
6 | from .levenshtein_transformer import *
7 | from .insertion_transformer import *
8 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/roberta/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .hub_interface import *  # noqa
 7 | from .model import *  # noqa
 8 | from .model_camembert import *  # noqa
 9 | from .model_xlmr import *  # noqa
10 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/roberta/model_camembert.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | CamemBERT: a Tasty French Language Model
 7 | """
 8 | 
 9 | from fairseq.models import register_model
10 | 
11 | from .hub_interface import RobertaHubInterface
12 | from .model import RobertaModel
13 | 
14 | 
15 | @register_model('camembert')
16 | class CamembertModel(RobertaModel):
17 | 
18 |     @classmethod
19 |     def hub_models(cls):
20 |         return {
21 |             'camembert.v0': 'http://dl.fbaipublicfiles.com/fairseq/models/camembert.v0.tar.gz',
22 |         }
23 | 
24 |     @classmethod
25 |     def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
26 |         from fairseq import hub_utils
27 |         x = hub_utils.from_pretrained(
28 |             model_name_or_path,
29 |             checkpoint_file,
30 |             data_name_or_path,
31 |             archive_map=cls.hub_models(),
32 |             bpe=bpe,
33 |             load_checkpoint_heads=True,
34 |             **kwargs,
35 |         )
36 |         return RobertaHubInterface(x['args'], x['task'], x['models'][0])
37 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/models/roberta/model_xlmr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | Unsupervised Cross-lingual Representation Learning at Scale
 7 | """
 8 | 
 9 | from fairseq.models import register_model
10 | 
11 | from .hub_interface import RobertaHubInterface
12 | from .model import RobertaModel
13 | 
14 | 
15 | @register_model('xlmr')
16 | class XLMRModel(RobertaModel):
17 | 
18 |     @classmethod
19 |     def hub_models(cls):
20 |         return {
21 |             'xlmr.base': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz',
22 |             'xlmr.large': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz',
23 |         }
24 | 
25 |     @classmethod
26 |     def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
27 |         from fairseq import hub_utils
28 |         x = hub_utils.from_pretrained(
29 |             model_name_or_path,
30 |             checkpoint_file,
31 |             data_name_or_path,
32 |             archive_map=cls.hub_models(),
33 |             bpe=bpe,
34 |             load_checkpoint_heads=True,
35 |             **kwargs,
36 |         )
37 |         return RobertaHubInterface(x['args'], x['task'], x['models'][0])
38 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/conv_tbc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | from torch.nn.modules.utils import _single
 8 | 
 9 | 
10 | class ConvTBC(torch.nn.Module):
11 |     """1D convolution over an input of shape (time x batch x channel)
12 | 
13 |     The implementation uses gemm to perform the convolution. This implementation
14 |     is faster than cuDNN for small kernel sizes.
15 |     """
16 |     def __init__(self, in_channels, out_channels, kernel_size, padding=0):
17 |         super(ConvTBC, self).__init__()
18 |         self.in_channels = in_channels
19 |         self.out_channels = out_channels
20 |         self.kernel_size = _single(kernel_size)
21 |         self.padding = _single(padding)
22 | 
23 |         self.weight = torch.nn.Parameter(torch.Tensor(
24 |             self.kernel_size[0], in_channels, out_channels))
25 |         self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
26 | 
27 |     def forward(self, input):
28 |         return torch.conv_tbc(input.contiguous(), self.weight, self.bias, self.padding[0])
29 | 
30 |     def __repr__(self):
31 |         s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}'
32 |              ', padding={padding}')
33 |         if self.bias is None:
34 |             s += ', bias=False'
35 |         s += ')'
36 |         return s.format(name=self.__class__.__name__, **self.__dict__)
37 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/dynamicconv_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .dynamicconv_layer import DynamicconvLayer  # noqa
7 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | std::vector<float*> dynamicconv_cpu_forward(
 5 |     float* input,
 6 |     float* filters,
 7 |     int padding_l);
 8 | 
 9 | std::vector<float*> dynamicconv_cpu_backward(
10 |     float* gradOutput,
11 |     int padding_l,
12 |     float* input,
13 |     float* filters);
14 | 
15 | std::vector<float*> dynamicconv_forward(
16 |     float* input,
17 |     float* filters,
18 |     int padding_l) {
19 | 
20 |     return dynamicconv_cpu_forward(input, filters, padding_l);
21 | }
22 | 
23 | std::vector<float*> dynamicconv_backward(
24 |     float* gradOutput,
25 |     int padding_l,
26 |     float* input,
27 |     float* filters) {
28 | 
29 |     return dynamicconv_cpu_backward(gradOutput, padding_l, input, filters);
30 | }
31 | 
32 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
33 |     m.def("forward", &dynamicconv_forward, "dynamicconv forward (CPU)");
34 |     m.def("backward", &dynamicconv_backward, "dynamicconv backward (CPU)");
35 | }
36 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/dynamicconv_layer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 9 | 
10 | setup(
11 |     name='dynamicconv_layer',
12 |     ext_modules=[
13 |         CUDAExtension(
14 |             name='dynamicconv_cuda',
15 |             sources=[
16 |                 'dynamicconv_cuda.cpp',
17 |                 'dynamicconv_cuda_kernel.cu',
18 |             ],
19 |         ),
20 |     ],
21 |     cmdclass={
22 |         'build_ext': BuildExtension
23 |     })
24 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/fp32_group_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | Layer norm done in fp32 (for fp16 training)
 7 | """
 8 | 
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class Fp32GroupNorm(nn.GroupNorm):
14 |     def __init__(self, *args, **kwargs):
15 |         super().__init__(*args, **kwargs)
16 | 
17 |     def forward(self, input):
18 |         output = F.group_norm(
19 |             input.float(),
20 |             self.num_groups,
21 |             self.weight.float() if self.weight is not None else None,
22 |             self.bias.float() if self.bias is not None else None,
23 |             self.eps,
24 |         )
25 |         return output.type_as(input)
26 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/gelu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with
 7 | the corresponding GitHub repo: https://github.com/hendrycks/GELUs
 8 | """
 9 | 
10 | import math
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | 
16 | def gelu_accurate(x):
17 |     if not hasattr(gelu_accurate, "_a"):
18 |         gelu_accurate._a = math.sqrt(2 / math.pi)
19 |     return (
20 |         0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3))))
21 |     )
22 | 
23 | 
24 | def gelu(x: torch.Tensor) -> torch.Tensor:
25 |     if hasattr(torch.nn.functional, "gelu"):
26 |         return torch.nn.functional.gelu(x.float()).type_as(x)
27 |     else:
28 |         return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
29 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/grad_multiply.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | class GradMultiply(torch.autograd.Function):
10 |     @staticmethod
11 |     def forward(ctx, x, scale):
12 |         ctx.scale = scale
13 |         res = x.new(x)
14 |         return res
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad):
18 |         return grad * ctx.scale, None
19 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/layer_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | try:
12 |     from apex.normalization import FusedLayerNorm as _FusedLayerNorm
13 | 
14 |     has_fused_layernorm = True
15 | 
16 |     class FusedLayerNorm(_FusedLayerNorm):
17 |         @torch.jit.unused
18 |         def forward(self, x):
19 |             return super().forward(x)
20 | 
21 | 
22 | except ImportError:
23 |     has_fused_layernorm = False
24 | 
25 | 
26 | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
27 |     if not export and torch.cuda.is_available() and has_fused_layernorm:
28 |         return FusedLayerNorm(normalized_shape, eps, elementwise_affine)
29 |     return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine)
30 | 
31 | 
32 | class Fp32LayerNorm(nn.LayerNorm):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     def forward(self, input):
37 |         output = F.layer_norm(
38 |             input.float(),
39 |             self.normalized_shape,
40 |             self.weight.float() if self.weight is not None else None,
41 |             self.bias.float() if self.bias is not None else None,
42 |             self.eps,
43 |         )
44 |         return output.type_as(input)
45 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/lightconv_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .lightconv_layer import LightconvLayer  # noqa
7 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/lightconv_layer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 9 | 
10 | setup(
11 |     name='lightconv_layer',
12 |     ext_modules=[
13 |         CUDAExtension('lightconv_cuda', [
14 |             'lightconv_cuda.cpp',
15 |             'lightconv_cuda_kernel.cu',
16 |         ]),
17 |     ],
18 |     cmdclass={
19 |         'build_ext': BuildExtension
20 |     })
21 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/positional_embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.nn as nn
 7 | from .learned_positional_embedding import LearnedPositionalEmbedding
 8 | from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding
 9 | 
10 | 
11 | def PositionalEmbedding(
12 |         num_embeddings: int,
13 |         embedding_dim: int,
14 |         padding_idx: int,
15 |         learned: bool = False,
16 | ):
17 |     if learned:
18 |         # if padding_idx is specified then offset the embedding ids by
19 |         # this index and adjust num_embeddings appropriately
20 |         # TODO: The right place for this offset would be inside
21 |         # LearnedPositionalEmbedding. Move this there for a cleaner implementation.
22 |         if padding_idx is not None:
23 |             num_embeddings = num_embeddings + padding_idx + 1
24 |         m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
25 |         nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
26 |         if padding_idx is not None:
27 |             nn.init.constant_(m.weight[padding_idx], 0)
28 |     else:
29 |         m = SinusoidalPositionalEmbedding(
30 |             embedding_dim, padding_idx, init_size=num_embeddings + padding_idx + 1,
31 |         )
32 |     return m
33 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/scalar_bias.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | #
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | class ScalarBias(torch.autograd.Function):
11 |     """
12 |     Adds a vector of scalars, used in self-attention mechanism to allow
13 |     the model to optionally attend to this vector instead of the past
14 |     """
15 | 
16 |     @staticmethod
17 |     def forward(ctx, input, dim, bias_init):
18 |         size = list(input.size())
19 |         size[dim] += 1
20 |         output = input.new(*size).fill_(bias_init)
21 |         output.narrow(dim, 1, size[dim] - 1).copy_(input)
22 |         ctx.dim = dim
23 |         return output
24 | 
25 |     @staticmethod
26 |     def backward(ctx, grad):
27 |         return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None
28 | 
29 | 
30 | def scalar_bias(input, dim, bias_init=0):
31 |     return ScalarBias.apply(input, dim, bias_init)
32 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/modules/unfold.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | def unfold1d(x, kernel_size, padding_l, pad_value=0):
10 |     '''unfold T x B x C to T x B x C x K'''
11 |     if kernel_size > 1:
12 |         T, B, C = x.size()
13 |         x = F.pad(x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value)
14 |         x = x.as_strided((T, B, C, kernel_size), (B*C, C, 1, B*C))
15 |     else:
16 |         x = x.unsqueeze(3)
17 |     return x
18 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/optim/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.optim.fairseq_optimizer import FairseqOptimizer
11 | from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer
12 | from fairseq.optim.bmuf import FairseqBMUF  # noqa
13 | 
14 | 
15 | __all__ = [
16 |     'FairseqOptimizer',
17 |     'FP16Optimizer',
18 |     'MemoryEfficientFP16Optimizer',
19 | ]
20 | 
21 | 
22 | build_optimizer, register_optimizer, OPTIMIZER_REGISTRY = registry.setup_registry(
23 |     '--optimizer',
24 |     base_class=FairseqOptimizer,
25 |     default='nag',
26 | )
27 | 
28 | 
29 | # automatically import any Python files in the optim/ directory
30 | for file in os.listdir(os.path.dirname(__file__)):
31 |     if file.endswith('.py') and not file.startswith('_'):
32 |         module = file[:file.find('.py')]
33 |         importlib.import_module('fairseq.optim.' + module)
34 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/optim/adagrad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.optim
 7 | 
 8 | from . import FairseqOptimizer, register_optimizer
 9 | 
10 | 
11 | @register_optimizer('adagrad')
12 | class Adagrad(FairseqOptimizer):
13 |     def __init__(self, args, params):
14 |         super().__init__(args)
15 |         self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config)
16 | 
17 |     @staticmethod
18 |     def add_args(parser):
19 |         """Add optimizer-specific arguments to the parser."""
20 |         # fmt: off
21 |         parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
22 |                             help='weight decay')
23 |         # fmt: on
24 | 
25 |     @property
26 |     def optimizer_config(self):
27 |         """
28 |         Return a kwarg dictionary that will be used to override optimizer
29 |         args stored in checkpoints. This allows us to load a checkpoint and
30 |         resume training using a different set of optimizer args, e.g., with a
31 |         different learning rate.
32 |         """
33 |         return {
34 |             'lr': self.args.lr[0],
35 |             'weight_decay': self.args.weight_decay,
36 |         }
37 | 
38 |     @property
39 |     def supports_flat_params(self):
40 |         return True
41 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/optim/lr_scheduler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import FairseqLRScheduler
11 | 
12 | 
13 | build_lr_scheduler, register_lr_scheduler, LR_SCHEDULER_REGISTRY = registry.setup_registry(
14 |     '--lr-scheduler',
15 |     base_class=FairseqLRScheduler,
16 |     default='fixed',
17 | )
18 | 
19 | # automatically import any Python files in the optim/lr_scheduler/ directory
20 | for file in os.listdir(os.path.dirname(__file__)):
21 |     if file.endswith('.py') and not file.startswith('_'):
22 |         module = file[:file.find('.py')]
23 |         importlib.import_module('fairseq.optim.lr_scheduler.' + module)
24 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .. import FairseqOptimizer
 7 | 
 8 | 
 9 | class FairseqLRScheduler(object):
10 | 
11 |     def __init__(self, args, optimizer):
12 |         super().__init__()
13 |         if not isinstance(optimizer, FairseqOptimizer):
14 |             raise ValueError('optimizer must be an instance of FairseqOptimizer')
15 |         self.args = args
16 |         self.optimizer = optimizer
17 |         self.best = None
18 | 
19 |     @staticmethod
20 |     def add_args(parser):
21 |         """Add arguments to the parser for this LR scheduler."""
22 |         pass
23 | 
24 |     def state_dict(self):
25 |         """Return the LR scheduler state dict."""
26 |         return {'best': self.best}
27 | 
28 |     def load_state_dict(self, state_dict):
29 |         """Load an LR scheduler state dict."""
30 |         self.best = state_dict['best']
31 | 
32 |     def step(self, epoch, val_loss=None):
33 |         """Update the learning rate at the end of the given epoch."""
34 |         if val_loss is not None:
35 |             if self.best is None:
36 |                 self.best = val_loss
37 |             else:
38 |                 self.best = min(self.best, val_loss)
39 | 
40 |     def step_update(self, num_updates):
41 |         """Update the learning rate after each update."""
42 |         return self.optimizer.get_lr()
43 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/pdb.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import multiprocessing
 7 | import os
 8 | import pdb
 9 | import sys
10 | 
11 | 
12 | __all__ = ['set_trace']
13 | 
14 | 
15 | _stdin = [None]
16 | _stdin_lock = multiprocessing.Lock()
17 | try:
18 |     _stdin_fd = sys.stdin.fileno()
19 | except Exception:
20 |     _stdin_fd = None
21 | 
22 | 
23 | class MultiprocessingPdb(pdb.Pdb):
24 |     """A Pdb wrapper that works in a multiprocessing environment.
25 | 
26 |     Usage: `from fairseq import pdb; pdb.set_trace()`
27 |     """
28 | 
29 |     def __init__(self):
30 |         pdb.Pdb.__init__(self, nosigint=True)
31 | 
32 |     def _cmdloop(self):
33 |         stdin_bak = sys.stdin
34 |         with _stdin_lock:
35 |             try:
36 |                 if _stdin_fd is not None:
37 |                     if not _stdin[0]:
38 |                         _stdin[0] = os.fdopen(_stdin_fd)
39 |                     sys.stdin = _stdin[0]
40 |                 self.cmdloop()
41 |             finally:
42 |                 sys.stdin = stdin_bak
43 | 
44 | 
45 | def set_trace():
46 |     pdb = MultiprocessingPdb()
47 |     pdb.set_trace(sys._getframe().f_back)
48 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/tasks/translation_from_pretrained_xlm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary
 7 | from fairseq.tasks.translation import TranslationTask
 8 | 
 9 | from . import register_task
10 | 
11 | 
12 | @register_task("translation_from_pretrained_xlm")
13 | class TranslationFromPretrainedXLMTask(TranslationTask):
14 |     """
15 |     Same as TranslationTask except use the MaskedLMDictionary class so that
16 |     we can load data that was binarized with the MaskedLMDictionary class.
17 | 
18 |     This task should be used for the entire training pipeline when we want to
19 |     train an NMT model from a pretrained XLM checkpoint: binarizing NMT data,
20 |     training NMT with the pretrained XLM checkpoint, and subsequent evaluation
21 |     of that trained model.
22 |     """
23 | 
24 |     @classmethod
25 |     def load_dictionary(cls, filename):
26 |         """Load the masked LM dictionary from the filename
27 | 
28 |         Args:
29 |             filename (str): the filename
30 |         """
31 |         return MaskedLMDictionary.load(filename)
32 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import re
 7 | 
 8 | SPACE_NORMALIZER = re.compile(r"\s+")
 9 | 
10 | 
11 | def tokenize_line(line):
12 |     line = SPACE_NORMALIZER.sub(" ", line)
13 |     line = line.strip()
14 |     return line.split()
15 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/fairseq_cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webis-de/summary-workbench/c17d28b89bb43f43f99abc061c3c8a5c4eaf1971/summarizers/gsum/guided_summarization/bart/fairseq_cli/__init__.py


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.generate import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/interactive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.interactive import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/preprocess.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.preprocess import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/requirements.txt:
--------------------------------------------------------------------------------
1 | PyTorch version >= 1.4.0
2 | Python version >= 3.6
3 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/score.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.score import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from fairseq.models.bart import BARTModel
 3 | 
 4 | import sys
 5 | 
 6 | bart = BARTModel.from_pretrained(
 7 |     sys.argv[3],
 8 |     checkpoint_file=sys.argv[4],
 9 |     data_name_or_path=sys.argv[5]
10 | )
11 | 
12 | bart.cuda()
13 | bart.eval()
14 | bart.half()
15 | count = 1
16 | bsz = 16
17 | with open(sys.argv[1]) as source, open(sys.argv[2], 'w') as fout:
18 |     sline = source.readline().strip()
19 |     slines = [sline]
20 |     for sline in source:
21 |         if count % bsz == 0:
22 |             with torch.no_grad():
23 |                 hypotheses_batch = bart.sample(slines, beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3)
24 | 
25 |             for hypothesis in hypotheses_batch:
26 |                 fout.write(hypothesis + '\n')
27 |                 fout.flush()
28 |             slines = []
29 | 
30 |         slines.append(sline.strip())
31 |         count += 1
32 |     if slines != []:
33 |         hypotheses_batch = bart.sample(slines, beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3)
34 |         for hypothesis in hypotheses_batch:
35 |             fout.write(hypothesis + '\n')
36 |             fout.flush()
37 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.train import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/validate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from fairseq_cli.validate import cli_main
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     cli_main()
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/z_bin.sh:
--------------------------------------------------------------------------------
 1 | BPE_DIR=/path/to/the/BPE_input/directory
 2 | BIN_DIR=/path/to/the/output/directory
 3 | python fairseq_cli/guided_preprocess.py \
 4 |   --source-lang "source" \
 5 |   --target-lang "target" \
 6 |   --trainpref $BPE_DIR"/train.bpe" \
 7 |   --validpref $BPE_DIR"/val.bpe" \
 8 |   --destdir $BIN_DIR \
 9 |   --workers 60 \
10 |   --srcdict dict.txt \
11 |   --tgtdict dict.txt;
12 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/z_bpe.sh:
--------------------------------------------------------------------------------
 1 | INPUT=/path/to/the/input/file
 2 | OUTPUT=/path/to/the/output/file
 3 | wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
 4 | wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
 5 | wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt'
 6 | python -m examples.roberta.multiprocessing_bpe_encoder \
 7 | --encoder-json encoder.json \
 8 | --vocab-bpe vocab.bpe \
 9 | --inputs "$INPUT" \
10 | --outputs "$OUTPUT" \
11 | --workers 60 \
12 | --keep-empty;
13 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/z_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from fairseq.models.bart import GuidedBARTModel
 3 | 
 4 | import sys
 5 | bart = GuidedBARTModel.from_pretrained(
 6 |     sys.argv[4],
 7 |     checkpoint_file=sys.argv[5],
 8 |     data_name_or_path=sys.argv[6]
 9 | )
10 | 
11 | bart.cuda()
12 | bart.eval()
13 | bart.half()
14 | count = 1
15 | bsz = 16
16 | 
17 | with open(sys.argv[1]) as source, open(sys.argv[2]) as zs, open(sys.argv[3], 'w') as fout:
18 |     sline = source.readline().strip()
19 |     slines = [sline]
20 |     zline = zs.readline().strip()
21 |     zlines = [zline]
22 |     for sline, zline in zip(source, zs):
23 |         if count % bsz == 0:
24 |             with torch.no_grad():
25 |                 hypotheses_batch = bart.sample(slines, zlines, beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3, guided=True)
26 | 
27 |             for hypothesis in hypotheses_batch:
28 |                 fout.write(hypothesis + '\n')
29 |                 fout.flush()
30 |             slines = []
31 |             zlines = []
32 | 
33 |         slines.append(sline.strip())
34 |         zlines.append(zline.strip())
35 |         count += 1
36 |     if slines != []:
37 |         hypotheses_batch = bart.sample(slines, zlines, beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3, guided=True)
38 |         for hypothesis in hypotheses_batch:
39 |             fout.write(hypothesis + '\n')
40 |             fout.flush()
41 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/z_test.sh:
--------------------------------------------------------------------------------
1 | SRC=$1
2 | GUIDANCE=$2
3 | RESULT_PATH=$3
4 | MODEL_DIR=$4
5 | MODEL_NAME=$5
6 | DATA_BIN=$6
7 | python z_test.py $SRC $GUIDANCE $RESULT_PATH $MODEL_DIR $MODEL_NAME $DATA_BIN
8 | 


--------------------------------------------------------------------------------
/summarizers/gsum/guided_summarization/bart/z_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | TOTAL_NUM_UPDATES=20000  
 3 | WARMUP_UPDATES=500      
 4 | LR=3e-05
 5 | MAX_TOKENS=2048
 6 | UPDATE_FREQ=16
 7 | BART_PATH=/projects/tir5/users/pliu3/zdou/fairseq/bart.large/model.pt
 8 | DATA_BIN=$1
 9 | SAVE_DIR=$2
10 | 
11 | CUDA_VISIBLE_DEVICES=0,1 python train.py $DATA_BIN \
12 |     --restore-file $BART_PATH \
13 |     --max-tokens $MAX_TOKENS \
14 |     --task guided_translation \
15 |     --source-lang source --target-lang target \
16 |     --truncate-source \
17 |     --layernorm-embedding \
18 |     --share-all-embeddings \
19 |     --share-decoder-input-output-embed \
20 |     --reset-optimizer --reset-dataloader --reset-meters \
21 |     --required-batch-size-multiple 1 \
22 |     --arch guided_bart_large \
23 |     --criterion label_smoothed_cross_entropy \
24 |     --label-smoothing 0.1 \
25 |     --dropout 0.1 --attention-dropout 0.1 \
26 |     --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \
27 |     --clip-norm 0.1 \
28 |     --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
29 |     --fp16 --update-freq $UPDATE_FREQ \
30 |     --skip-invalid-size-inputs-valid-test \
31 |     --save-dir $SAVE_DIR \
32 |     --find-unused-parameters;
33 | 


--------------------------------------------------------------------------------
/summarizers/gsum/summarizer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.insert(0, "./guided_summarization/bart")
 4 | from fairseq.models.bart.guided_model import GuidedBARTModel
 5 | from model_setup import DATA_PATH, SAVE_PATH
 6 | from pydantic import Field
 7 | 
 8 | MODEL_PATH = SAVE_PATH
 9 | DATA_PATH = DATA_PATH / "data"
10 | 
11 | 
12 | class GuidedBART(object):
13 |     def __init__(self):
14 |         self.bart = GuidedBARTModel.from_pretrained(
15 |             str(MODEL_PATH), "bart_sentence.pt", str(DATA_PATH)
16 |         )
17 |         if self.bart:
18 |             print("Initialized GuidedBART.")
19 |             self.bart.eval()
20 | 
21 |     def summarize(self, text, guidance, ratio=0.2):
22 |         texts = [text]
23 |         sents = self.bart.sample(
24 |             texts,
25 |             [guidance],
26 |             beam=4,
27 |             lenpen=2.0,
28 |             max_len_b=140,
29 |             min_len=55,
30 |             no_repeat_ngram_size=3,
31 |             guided=True,
32 |         )
33 |         return " ".join(sents)
34 | 
35 | 
36 | class SummarizerPlugin:
37 |     def __init__(self):
38 |         self.summarizer = GuidedBART()
39 | 
40 |     def summarize(self, batch, ratio, guidance: str = Field(..., min_length=1)):
41 |         return [self.summarizer.summarize(text, guidance, ratio) for text in batch]
42 | 


--------------------------------------------------------------------------------
/summarizers/gsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "GSum-BART"
3 | metadata:
4 |   type: abstractive
5 |   homepage: https://aclanthology.org/2021.naacl-main.384/
6 |   sourcecode: https://github.com/neulab/guided_summarization
7 | 


--------------------------------------------------------------------------------
/summarizers/lobart/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | tqdm = "*"
 8 | requests = "*"
 9 | torch = "*"
10 | transformers = "==2.11.0"
11 | 
12 | [dev-packages]
13 | 
14 | [requires]
15 | python_version = "3.8"  # 3.9 and higher does not work
16 | 


--------------------------------------------------------------------------------
/summarizers/lobart/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "LoBART"
3 | metadata:
4 |   type: abstractive
5 |   homepage: https://aclanthology.org/2021.acl-long.470
6 |   sourcecode: https://github.com/potsawee/longsum0
7 | 


--------------------------------------------------------------------------------
/summarizers/longformer2roberta/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | transformers = "*"
10 | torch = "*"
11 | 
12 | [requires]
13 | python_version = "3.10"
14 | 


--------------------------------------------------------------------------------
/summarizers/longformer2roberta/model_setup.py:
--------------------------------------------------------------------------------
1 | from summarizer import SummarizerPlugin
2 | 
3 | if __name__ == "__main__":
4 |     SummarizerPlugin()
5 | 


--------------------------------------------------------------------------------
/summarizers/longformer2roberta/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "Longformer2Roberta"
3 | metadata:
4 |   type: abstractive
5 | 


--------------------------------------------------------------------------------
/summarizers/neuralsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | transformers = "*"
10 | torch = "*"
11 | sentencepiece = "*"
12 | protobuf = "==3.20.*"
13 | 
14 | [requires]
15 | python_version = "3.10"
16 | 


--------------------------------------------------------------------------------
/summarizers/neuralsum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from summarizer import SummarizerPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 | 
10 |     logger.info("downloading model")
11 |     SummarizerPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/summarizers/neuralsum/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .summarizer import NeuralSummarizer
 4 | 
 5 | 
 6 | class SummarizerPlugin:
 7 |     def __init__(self, *, model=None):
 8 |         self.model = model or os.environ["model"]
 9 |         self.summarizer = NeuralSummarizer(self.model)
10 | 
11 |     def summarize(self, batch, ratio, use_contrastive_search: bool = True):
12 |         return [
13 |             self.summarizer.summarize(
14 |                 text, ratio=ratio, use_contrastive_search=use_contrastive_search
15 |             )
16 |             for text in batch
17 |         ]
18 | 
19 |     def metadata(self):
20 |         return self.summarizer.metadata
21 | 


--------------------------------------------------------------------------------
/summarizers/neuralsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "{model}"
3 | metadata:
4 |   type: abstractive
5 | 


--------------------------------------------------------------------------------
/summarizers/newspaper3k/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | newspaper3k = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/summarizers/newspaper3k/model_setup.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | 
3 | def setup():
4 |     nltk.download("punkt")
5 | 
6 | if __name__ == "__main__":
7 |     setup()
8 | 


--------------------------------------------------------------------------------
/summarizers/newspaper3k/summarizer/__init__.py:
--------------------------------------------------------------------------------
 1 | from newspaper import nlp
 2 | 
 3 | 
 4 | def take_ratio(ranked_sents, ratio):
 5 |     ranked_sents = [(i, s.split()) for i, s in ranked_sents]
 6 |     num_tokens = sum(len(s) for _, s in ranked_sents)
 7 |     requested_tokens = round(ratio * num_tokens)
 8 |     token_count = 0
 9 |     taken_sents = []
10 |     for i, sent in ranked_sents:
11 |         prev_token_count = token_count
12 |         token_count += len(sent)
13 |         if taken_sents and (token_count - requested_tokens) > (
14 |             requested_tokens - prev_token_count
15 |         ):
16 |             break
17 |         taken_sents.append((i, sent))
18 |     taken_sents.sort(key=lambda x: x[0])
19 |     return [" ".join(s) for _, s in taken_sents]
20 | 
21 | 
22 | class SummarizerPlugin:
23 |     def __init__(self):
24 |         pass
25 | 
26 |     def _summarize(self, text, ratio):
27 |         sentences = nlp.split_sentences(text)
28 |         keys = nlp.keywords(text)
29 | 
30 |         ranks = list(nlp.score(sentences, [], keys).items())
31 |         ranks.sort(key=lambda x: x[1], reverse=True)
32 |         ranks = [s[0] for s in ranks]
33 |         summary = " ".join(take_ratio(ranks, ratio))
34 |         return summary
35 | 
36 |     def summarize(self, batch, ratio):
37 |         return [self._summarize(text, ratio) for text in batch]
38 | 


--------------------------------------------------------------------------------
/summarizers/newspaper3k/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: Newspaper3k
3 | metadata:
4 |   type: extractive
5 |   homepage: https://newspaper.readthedocs.io/en/latest/
6 |   sourcecode: https://github.com/codelucas/newspaper/
7 | 


--------------------------------------------------------------------------------
/summarizers/pmisum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | transformers = "*"
 8 | torch = "*"
 9 | nltk = "*"
10 | requests = "*"
11 | 
12 | [dev-packages]
13 | 
14 | [requires]
15 | python_version = "3.10"
16 | 


--------------------------------------------------------------------------------
/summarizers/pmisum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import tarfile
 2 | from pathlib import Path
 3 | 
 4 | import nltk
 5 | import requests
 6 | 
 7 | CHECKPOINT_URL = "https://files.webis.de/summarization-models/pmisum/checkpoints.tar.gz"
 8 | SAVE_PATH = Path("~/.cache/pmisum").expanduser()
 9 | 
10 | 
11 | def setup():
12 |     if not SAVE_PATH.exists():
13 |         print("Downloading checkpoints...")
14 |         Path(SAVE_PATH).mkdir(parents=True, exist_ok=True)
15 |         try:
16 |             with requests.get(CHECKPOINT_URL, stream=True) as response:
17 |                 with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
18 |                     tar.extractall(path=SAVE_PATH)
19 |             print("Downloaded and extracted checkpoint files.")
20 |         except Exception as e:
21 |             print("An error occurred, removing path")
22 |             SAVE_PATH.unlink()
23 |             raise e
24 |     else:
25 |         print("Checkpoints already exist")
26 |     print("Initializing NLTK ...")
27 |     nltk.download("punkt")
28 |     print("Initialized NLTK")
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     setup()
33 | 


--------------------------------------------------------------------------------
/summarizers/pmisum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "PMISum"
3 | metadata:
4 |   type: extractive
5 |   homepage: https://aclanthology.org/2021.eacl-main.213/
6 |   sourcecode: https://github.com/vishakhpk/mi-unsup-summ
7 | 


--------------------------------------------------------------------------------
/summarizers/positionrank/model_setup.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | def setup():
 5 |     process = subprocess.run(['python', '-m', 'spacy', 'download', 'en_core_web_sm'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 6 | 
 7 | if __name__ == "__main__":
 8 |     setup()
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/summarizers/positionrank/requirements.txt:
--------------------------------------------------------------------------------
1 | pytextrank==3.2.3


--------------------------------------------------------------------------------
/summarizers/positionrank/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "Position Rank"
3 | metadata:
4 |   type: extractive
5 |   sourcecode: "https://github.com/DerwenAI/pytextrank"


--------------------------------------------------------------------------------
/summarizers/schnitsum/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | schnitsum = "*"
10 | 
11 | [requires]
12 | python_version = "3.10"
13 | 


--------------------------------------------------------------------------------
/summarizers/schnitsum/model_setup.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import logging
 3 | 
 4 | from summarizer import MODEL, SummarizerPlugin
 5 | 
 6 | 
 7 | def setup():
 8 |     logger = logging.getLogger(inspect.currentframe().f_code.co_name)
 9 | 
10 |     logger.info("downloading %s", MODEL)
11 |     SummarizerPlugin()
12 |     logger.info("done")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     FORMAT = "{asctime} {levelname} [{name}] {message}"
17 |     DATEFMT = "%H:%M:%S"
18 |     logging.basicConfig(format=FORMAT, datefmt=DATEFMT, style="{")
19 |     setup()
20 | 


--------------------------------------------------------------------------------
/summarizers/schnitsum/summarizer.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | from schnitsum import SchnitSum
 4 | 
 5 | MODEL = environ.get("model") or "BART"
 6 | 
 7 | MODELS = {
 8 |     "BART": "sobamchan/bart-large-scitldr",
 9 |     "BART-65-shrinked": "sobamchan/bart-large-scitldr-distilled-3-3",
10 |     "BART-37-shrinked": "sobamchan/bart-large-scitldr-distilled-12-3",
11 | }
12 | 
13 | 
14 | class SummarizerPlugin:
15 |     def __init__(self):
16 |         model_key = MODELS[MODEL]
17 |         self.model = SchnitSum(model_key)
18 |         self.meta = {"model": model_key}
19 | 
20 |     def summarize(self, batch, ratio):
21 |         return self.model(batch, batch_size=len(batch))
22 | 
23 |     def metadata(self):
24 |         return self.meta
25 | 


--------------------------------------------------------------------------------
/summarizers/schnitsum/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "Schnitsum ({model})"
3 | metadata:
4 |   type: abstractive
5 |   sourcecode: https://github.com/sobamchan/schnitsum
6 | 


--------------------------------------------------------------------------------
/summarizers/textrank/model_setup.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | def setup():
 5 |     process = subprocess.run(['python', '-m', 'spacy', 'download', 'en_core_web_sm'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 6 | 
 7 | if __name__ == "__main__":
 8 |     setup()
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/summarizers/textrank/requirements.txt:
--------------------------------------------------------------------------------
1 | pytextrank==3.2.3


--------------------------------------------------------------------------------
/summarizers/textrank/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "TextRank"
3 | metadata:
4 |   type: extractive
5 |   sourcecode: "https://github.com/DerwenAI/pytextrank"


--------------------------------------------------------------------------------
/summarizers/topicrank/model_setup.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | def setup():
 5 |     process = subprocess.run(['python', '-m', 'spacy', 'download', 'en_core_web_sm'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 6 | 
 7 | if __name__ == "__main__":
 8 |     setup()
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/summarizers/topicrank/requirements.txt:
--------------------------------------------------------------------------------
1 | pytextrank==3.2.3


--------------------------------------------------------------------------------
/summarizers/topicrank/sw-plugin-config.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | name: "TopicRank"
3 | metadata:
4 |   type: extractive
5 |   sourcecode: "https://github.com/DerwenAI/pytextrank"


--------------------------------------------------------------------------------
/templates/docker/api.yaml:
--------------------------------------------------------------------------------
 1 | api:
 2 |   image: python:latest
 3 |   working_dir: /app
 4 |   ports:
 5 |     - "127.0.0.1:5000:5000"
 6 |   volumes:
 7 |     - ./api/:/app
 8 |     - ./plugin_config:/plugin_config
 9 |     - api_root:/root
10 |   environment:
11 |     - MONGODB_HOST=mongodb://mongo/app
12 |     - GROBID_HOST=http://grobid:8070
13 |   command: ./boot.sh
14 | 
15 | volumes:
16 |   api_root:
17 | 


--------------------------------------------------------------------------------
/templates/docker/frontend.yaml:
--------------------------------------------------------------------------------
 1 | frontend:
 2 |   image: frontend:latest
 3 |   build:
 4 |     context: ./frontend/
 5 |     dockerfile: Dockerfile.dev
 6 |   working_dir: /app
 7 |   stdin_open: true
 8 |   ports:
 9 |     - "127.0.0.1:3000:3000"
10 |   volumes:
11 |     - ./frontend/:/app
12 |     - frontend_node_modules:/app/node_modules
13 |   environment:
14 |     - NODE_ENV=development
15 |   command: bash -c "chown -R node:node /app/node_modules && npm install && npm start"
16 | 
17 | volumes:
18 |   frontend_node_modules:
19 | 


--------------------------------------------------------------------------------
/templates/docker/grobid.yaml:
--------------------------------------------------------------------------------
1 | grobid:
2 |   image: grobid:latest
3 |   build: ./grobid
4 | 


--------------------------------------------------------------------------------
/templates/docker/mongo.yaml:
--------------------------------------------------------------------------------
1 | mongo:
2 |   image: mongo:latest
3 |   volumes:
4 |     - mongodata:/data/db
5 | 
6 | volumes:
7 |   mongodata:
8 | 


--------------------------------------------------------------------------------
/templates/kubernetes/basic/frontend.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: summary-workbench-frontend
 5 |   labels:
 6 |     app: summary-workbench
 7 |     tier: frontend
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       app: summary-workbench
12 |       tier: frontend
13 |   template:
14 |     metadata:
15 |       labels:
16 |         app: summary-workbench
17 |         tier: frontend
18 |     spec:
19 |       containers:
20 |         - name: frontend
21 |           image:
22 |           imagePullPolicy: Always
23 |           env: []
24 |           ports:
25 |             - name: frontend-port
26 |               containerPort: 80
27 |               protocol: TCP
28 |           readinessProbe:
29 |             httpGet:
30 |               path: /
31 |               port: frontend-port
32 |             periodSeconds: 15
33 |       restartPolicy: Always
34 | ---
35 | apiVersion: v1
36 | kind: Service
37 | metadata:
38 |   name: summary-workbench-frontend
39 | spec:
40 |   selector:
41 |     app: summary-workbench
42 |     tier: frontend
43 |   ports:
44 |     - port: 80
45 |       targetPort: frontend-port
46 | 


--------------------------------------------------------------------------------
/templates/kubernetes/basic/grobid.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: summary-workbench-grobid
 5 |   labels:
 6 |     app: summary-workbench
 7 |     tier: grobid
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       app: summary-workbench
12 |       tier: grobid
13 |   template:
14 |     metadata:
15 |       labels:
16 |         app: summary-workbench
17 |         tier: grobid
18 |     spec:
19 |       containers:
20 |         - name: grobid
21 |           image: mongo:latest
22 |           ports:
23 |             - name: grobid-port
24 |               containerPort: 8070
25 |               protocol: TCP
26 |           readinessProbe:
27 |             httpGet:
28 |               path: /
29 |               port: 8070
30 |             periodSeconds: 15
31 |       restartPolicy: Always
32 | ---
33 | apiVersion: v1
34 | kind: Service
35 | metadata:
36 |   name: summary-workbench-grobid
37 | spec:
38 |   selector:
39 |     app: summary-workbench
40 |     tier: grobid
41 |   ports:
42 |     - port: 8070
43 |       targetPort: grobid-port
44 | 


--------------------------------------------------------------------------------
/templates/kubernetes/basic/ingress.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: summary-workbench-ingress
 5 |   annotations:
 6 |     nginx.ingress.kubernetes.io/client-max-body-size: 512m
 7 |     nginx.ingress.kubernetes.io/proxy-connect-timeout: '3000'
 8 |     nginx.ingress.kubernetes.io/proxy-send-timeout: '3000'
 9 |     nginx.ingress.kubernetes.io/proxy-read-timeout: '3000'
10 | spec:
11 |   ingressClassName: nginx
12 |   rules:
13 |     - host:
14 |       http:
15 |         paths:
16 |           - path: /
17 |             pathType: Prefix
18 |             backend:
19 |               service:
20 |                 name: summary-workbench
21 |                 port:
22 |                   number: 80
23 | 


--------------------------------------------------------------------------------
/templates/kubernetes/basic/mongodb.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: summary-workbench-mongodb
 5 |   labels:
 6 |     app: summary-workbench
 7 |     tier: mongodb
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       app: summary-workbench
12 |       tier: mongodb
13 |   template:
14 |     metadata:
15 |       labels:
16 |         app: summary-workbench
17 |         tier: mongodb
18 |     spec:
19 |       volumes:
20 |         - name: mongodata
21 |           persistentVolumeClaim:
22 |             claimName: summary-workbench-mongodb-data
23 |       containers:
24 |         - name: mongodb
25 |           image: mongo:latest
26 |           ports:
27 |             - name: mongodb-port
28 |               containerPort: 27017
29 |               protocol: TCP
30 |           volumeMounts:
31 |             - mountPath: /data/db
32 |               name: mongodata
33 |           readinessProbe:
34 |             httpGet:
35 |               path: /
36 |               port: 27017
37 |             periodSeconds: 15
38 |       restartPolicy: Always
39 | ---
40 | apiVersion: v1
41 | kind: Service
42 | metadata:
43 |   name: summary-workbench-mongodb
44 | spec:
45 |   selector:
46 |     app: summary-workbench
47 |     tier: mongodb
48 |   ports:
49 |     - port: 27017
50 |       targetPort: mongodb-port
51 | 


--------------------------------------------------------------------------------
/templates/kubernetes/plugin.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name:
 5 |   labels:
 6 |     app: summary-workbench
 7 |     tier:
 8 |     version:
 9 | spec:
10 |   selector:
11 |     matchLabels:
12 |       app: summary-workbench
13 |       tier:
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: summary-workbench
18 |         tier:
19 |         version:
20 |     spec:
21 |       containers:
22 |         - name:
23 |           image:
24 |           imagePullPolicy: Always
25 |           ports:
26 |             - name: defaultport
27 |               containerPort: 5000
28 |               protocol: TCP
29 |           readinessProbe:
30 |             httpGet:
31 |               path: /health
32 |               port: defaultport
33 |             periodSeconds: 15
34 |             timeoutSeconds: 5
35 |       restartPolicy: Always
36 | ---
37 | apiVersion: v1
38 | kind: Service
39 | metadata:
40 |   name:
41 | spec:
42 |   selector:
43 |     app: summary-workbench
44 |     tier:
45 |     version:
46 |   ports:
47 |     - port: 5000
48 |       targetPort: defaultport
49 | 


--------------------------------------------------------------------------------
/templates/kubernetes/token_secrets.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: token-secrets
5 | type: Opaque
6 | stringData:
7 |   access-token-secret:
8 |   refresh-token-secret:
9 | 


--------------------------------------------------------------------------------
/templates/kubernetes/volumes.yaml:
--------------------------------------------------------------------------------
 1 | kind: PersistentVolumeClaim
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: summary-workbench-mongodb-data
 5 |   labels:
 6 |     app: summary-workbench
 7 |     tier: mongodb
 8 | spec:
 9 |   accessModes:
10 |     - ReadWriteOnce
11 |   resources:
12 |     requests:
13 |       storage: 1Gi
14 |   storageClassName: ceph-rbd
15 | 


--------------------------------------------------------------------------------
/version.json:
--------------------------------------------------------------------------------
1 | {
2 |   "api": "5.0.0",
3 |   "frontend": "5.0.1",
4 |   "grobid": "1.0.0"
5 | }
6 | 


--------------------------------------------------------------------------------