├── .gitignore ├── LICENSE ├── README.md ├── align.py ├── components.py ├── examples ├── wikinews │ ├── README.md │ ├── load.sh │ ├── wikinews.cache │ │ ├── meta.yaml │ │ ├── mgr │ │ │ ├── blocks │ │ │ │ └── 140645734304016 │ │ │ │ │ ├── data.feather │ │ │ │ │ └── meta.yaml │ │ │ ├── columns │ │ │ │ ├── BertscoreAligner:spacy:document:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:document:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:document:spacy:summary:reference │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:document:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:document:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:document:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:document:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:document:spacy:summary:reference │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:document:spacy:summary:reference │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.dill │ │ │ │ │ ├── meta.yaml │ │ │ │ │ └── state.dill │ │ │ │ ├── document │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_document │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_summary:bart-cnndm │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_summary:bart-xsum │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_summary:pegasus-cnndm │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_summary:pegasus-xsum │ │ │ │ │ └── state.dill │ │ │ │ ├── preprocessed_summary:reference │ │ │ │ │ └── state.dill │ │ │ │ ├── spacy:document │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── spacy:summary:bart-cnndm │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── spacy:summary:bart-xsum │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── spacy:summary:pegasus-cnndm │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── spacy:summary:pegasus-xsum │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── spacy:summary:reference │ │ │ │ │ ├── data.spacy │ │ │ │ │ └── meta.yaml │ │ │ │ ├── summary:bart-cnndm │ │ │ │ │ └── state.dill │ │ │ │ ├── summary:bart-xsum │ │ │ │ │ └── state.dill │ │ │ │ ├── summary:pegasus-cnndm │ │ │ │ │ └── state.dill │ │ │ │ ├── summary:pegasus-xsum │ │ │ │ │ └── state.dill │ │ │ │ └── summary:reference │ │ │ │ │ └── state.dill │ │ │ └── meta.yaml │ │ └── state.dill │ └── wikinews.jsonl └── xsum │ └── load.sh ├── generation.py ├── join.py ├── preprocessing.py ├── quickstart.sh ├── requirements.txt ├── resources ├── jquery.color-2.1.2.min.js ├── summvis.css └── summvis.js ├── summvis.py ├── utils.py └── website ├── annotations.png ├── demo.gif ├── main-vis.jpg ├── title.png └── triangle.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_STORE 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 SummVis 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SummVis 2 | 3 | SummVis is an open-source visualization tool that supports fine-grained analysis of summarization models, data, and evaluation 4 | metrics. Through its lexical and semantic visualizations, SummVis enables in-depth exploration across important dimensions such as factual consistency and abstractiveness. 5 | 6 | Authors: [Jesse Vig](https://twitter.com/jesse_vig)1, 7 | [Wojciech Kryściński](https://twitter.com/iam_wkr)1, 8 | [Karan Goel](https://twitter.com/krandiash)2, 9 | [Nazneen Fatema Rajani](https://twitter.com/nazneenrajani)1
10 | 1[Salesforce Research](https://einstein.ai/) 2[Stanford Hazy Research](https://hazyresearch.stanford.edu/) 11 | 12 | 📖 [Paper](https://arxiv.org/abs/2104.07605) 13 | 🎥 [Demo](https://vimeo.com/540429745) 14 | 15 |

16 | Demo gif 17 |

18 | 19 | _We welcome issues for questions, suggestions, requests or bug reports._ 20 | 21 | ## Table of Contents 22 | - [User guide](#user-guide) 23 | - [Installation](#installation) 24 | - [Quickstart](#quickstart) 25 | - [Load data into SummVis](#loading-data-into-summvis) 26 | - [Deploying SummVis remotely](#deploying-summvis-remotely) 27 | - [Citation](#citation) 28 | - [Acknowledgements](#acknowledgements) 29 | 30 | ## User guide 31 | 32 | ### Overview 33 | SummVis is a tool for analyzing abstractive summarization systems. It provides fine-grained insights on summarization 34 | models, data, and evaluation metrics by visualizing the relationships between source documents, reference summaries, 35 | and generated summaries, as illustrated in the figure below.
36 | 37 | ![Relations between source, reference, and generated summaries](website/triangle.png) 38 | 39 | ### Interface 40 | 41 | The SummVis interface is shown below. The example displayed is the first record from the 42 | [CNN / Daily Mail](https://huggingface.co/datasets/cnn_dailymail) validation set. 43 | 44 | ![Main interface](website/main-vis.jpg) 45 | 46 | 47 | #### Components 48 | 49 | **(a)** Configuration panel
50 | **(b)** Source document (or reference summary, depending on configuration)
51 | **(c)** Generated summaries (and/or reference summary, depending on configuration)
52 | **(d)** Scroll bar with global view of annotations
53 | 54 | #### Annotations 55 | Annotations 56 | 57 | **N-gram overlap:** Word sequences that overlap between the document on the left and 58 | the selected summary on the right. Underlines are color-coded by index of summary sentence.
59 | **Semantic overlap**: Words in the summary that are semantically close to one or more words in document on the left.
60 | **Novel words**: Words in the summary that do not appear in the document on the left.
61 | **Novel entities**: Entity words in the summary that do not appear in the document on the left.
62 | 63 | ### Limitations 64 | Currently only English text is supported. Extremely long documents may render slowly in the tool. 65 | 66 | ## Installation 67 | ```shell 68 | git clone https://github.com/robustness-gym/summvis.git 69 | cd summvis 70 | # Following line necessary to get pip > 21.3 71 | pip install --upgrade pip 72 | pip install -r requirements.txt 73 | ``` 74 | 75 | ## Quickstart 76 | 77 | View an example from [WikiNews](examples/wikinews/README.md): 78 | 79 | ```shell 80 | streamlit run summvis.py -- --path examples/wikinews/wikinews.cache 81 | ``` 82 | 83 | 84 | ## Loading data into SummVis 85 | 86 | ### If you have generated summaries: 87 | 88 | The following steps describe how to load source documents and associated precomputed summaries into the SummVis tool. 89 | 90 | **1. Download spaCy model** 91 | ``` 92 | python -m spacy download en_core_web_lg 93 | ``` 94 | This may take several minutes. 95 | 96 | **2. Create .jsonl file with the source document, reference summary and/or generated summaries in the following format:** 97 | 98 | ``` 99 | {"document": "This is the first source document", "summary:reference": "This is the reference summary", "summary:testmodel1": "This is the summary for testmodel1", "summary:testmodel2": "This is the summary for testmodel2"} 100 | {"document": "This is the second source document", "summary:reference": "This is the reference summary", "summary:testmodel1": "This is the summary for testmodel1", "summary:testmodel2": "This is the summary for testmodel2"} 101 | ``` 102 | 103 | The key for the reference summary must equal `summary:reference` and the key for any other summary must be of the form 104 | `summary:`, e.g. `summary:BART`. The document and at least one summary (reference, other, or both) are required. 105 | 106 | We also provide [scripts to generate summaries](#if-you-do-not-have-generated-summaries) if you haven't done so already. 107 | 108 | **3. Preprocess .jsonl file** 109 | 110 | Run `preprocessing.py` to precompute all data required in the interface (running `spaCy`, lexical and semantic 111 | aligners) and save a cache file, which can be read directly into the tool. Note that this script may take some time to run 112 | (~5-15 seconds per example on a MacBook Pro for 113 | documents of typical length found in CNN/DailyMail or XSum), so you may want to start with a small subset of your dataset 114 | using the `--n_samples` argument (below). This will also be expedited by running on a GPU. 115 | 116 | ```shell 117 | python preprocessing.py \ 118 | --workflow \ 119 | --dataset_jsonl path/to/my_dataset.jsonl \ 120 | --processed_dataset_path path/to/my_cache_file 121 | ``` 122 | 123 | Additional options: 124 | `--n_samples `: Process the first `number_of_samples` samples only (recommended). 125 | `--no_clean`: Do not perform additional text cleaning that may remove newlines, etc. 126 | 127 | **4. Launch Streamlit app** 128 | 129 | ```shell 130 | streamlit run summvis.py -- --path path/to/my_cache_file_or_parent_directory 131 | ``` 132 | 133 | Note that the additional `--` is not a mistake, and is required to pass command-line arguments in Streamlit. 134 | 135 | ### If you do NOT have generated summaries: 136 | 137 | Before running the steps above, you may run the additional steps below to generate summaries. You may also refer to the [sample 138 | end-to-end loading scripts](examples/) for [WikiNews](examples/wikinews/load.sh) (loaded from .jsonl file) and [XSum](examples/xsum/load.sh) 139 | (loaded from HuggingFace Datasets). 140 | 141 | **1. Create file with the source documents and optional reference summaries in the following format:** 142 | 143 | ``` 144 | {"document": "This is the first source document", "summary:reference": "This is the reference summary"} 145 | {"document": "This is the second source document", "summary:reference": "This is the reference summary"} 146 | ``` 147 | 148 | You may create a .jsonl format directly from a Huggingface dataset by running `preprocessing.py` with the `--standardize` flag: 149 | 150 | ```shell 151 | python preprocessing.py \ 152 | --standardize \ 153 | --dataset hf_dataset_name \ 154 | --version hf_dataset_version (optional) \ 155 | --split hf_dataset_split \ 156 | --save_jsonl_path path/to/save_jsonl_file 157 | ``` 158 | 159 | **2. Generate predictions** 160 | 161 | To use one of the **6 standard models** (`bart-xsum`, `bart-cnndm`, `pegasus-xsum`, `pegasus-cnndm`, `pegasus-newsroom`, 162 | `pegasus-multinews`): 163 | ```shell 164 | python generation.py --model model_abbrev --data_path path/to/jsonl_file 165 | ``` 166 | where `model` is one of the above 6 model codes. 167 | 168 | To use an **any Huggingface model**: 169 | ```shell 170 | python generation.py --model_name_or_path model_name_or_path --data_path path/to/jsonl_file 171 | ``` 172 | where `model_name_or_path` is the name of a Huggingface model or a local path. 173 | 174 | Either of the above two commands will generate a prediction file named `..predictions` 175 | 176 | **3. Join one or more prediction files (from previous step) with original dataset** 177 | 178 | ```shell 179 | python join.py \ 180 | --data_path path/to/jsonl_file \ 181 | --generation_paths \ 182 | path/to/prediction_file_1 \ 183 | path/to/prediction_file_2 \ 184 | --output_path path/to/save_jsonl_file 185 | ``` 186 | 187 | Once you complete these steps, you may proceed with the [final steps](#if-you-have-already-generated-summaries) to load your file into SummVis. 188 | 189 | ## Deploying SummVis remotely 190 | 191 | See these tutorials on deploying a Streamlit app to various cloud services (from [Streamlit docs](https://docs.streamlit.io/en/stable/streamlit_faq.html)): 192 | 193 | * [How to Deploy Streamlit to a Free Amazon EC2 instance](https://towardsdatascience.com/how-to-deploy-a-streamlit-app-using-an-amazon-free-ec2-instance-416a41f69dc3), by Rahul Agarwal 194 | * [Host Streamlit on Heroku](https://towardsdatascience.com/quickly-build-and-deploy-an-application-with-streamlit-988ca08c7e83), by Maarten Grootendorst 195 | * [Host Streamlit on Azure](https://towardsdatascience.com/deploying-a-streamlit-web-app-with-azure-app-service-1f09a2159743), by Richard Peterson 196 | * [Host Streamlit on 21YunBox](https://www.21yunbox.com/docs/#/deploy-streamlit), by Toby Lei 197 | 198 | ## Citation 199 | 200 | When referencing this repository, please cite [this paper](https://arxiv.org/abs/2104.07605): 201 | 202 | ``` 203 | @misc{vig2021summvis, 204 | title={SummVis: Interactive Visual Analysis of Models, Data, and Evaluation for Text Summarization}, 205 | author={Jesse Vig and Wojciech Kry{\'s}ci{\'n}ski and Karan Goel and Nazneen Fatema Rajani}, 206 | year={2021}, 207 | eprint={2104.07605}, 208 | archivePrefix={arXiv}, 209 | primaryClass={cs.CL}, 210 | url={https://arxiv.org/abs/2104.07605} 211 | } 212 | ``` 213 | 214 | ## Acknowledgements 215 | 216 | We thank [Michael Correll](http://correll.io) for his valuable feedback. 217 | 218 | 219 | -------------------------------------------------------------------------------- /align.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | import itertools 3 | from abc import ABC, abstractmethod 4 | from collections import defaultdict 5 | from operator import itemgetter 6 | from typing import List, Dict, Tuple 7 | from typing import Sequence 8 | from abc import ABC 9 | 10 | import numpy as np 11 | import torch 12 | from bert_score import BERTScorer 13 | from nltk import PorterStemmer 14 | from spacy.tokens import Doc, Span 15 | from toolz import itertoolz 16 | from transformers import AutoTokenizer 17 | from transformers.tokenization_utils_base import PaddingStrategy 18 | 19 | 20 | class EmbeddingModel(ABC): 21 | @abstractmethod 22 | def embed( 23 | self, 24 | sents: List[Span] 25 | ): 26 | pass 27 | 28 | 29 | class ContextualEmbedding(EmbeddingModel): 30 | 31 | def __init__(self, model, tokenizer_name, max_length, batch_size=32): 32 | self.model = model 33 | self.tokenizer = SpacyHuggingfaceTokenizer(tokenizer_name, max_length) 34 | self._device = model.device 35 | self.batch_size = batch_size 36 | 37 | def embed( 38 | self, 39 | sents: List[Span] 40 | ): 41 | spacy_embs_list = [] 42 | for start_idx in range(0, len(sents), self.batch_size): 43 | batch = sents[start_idx: start_idx + self.batch_size] 44 | encoded_input, special_tokens_masks, token_alignments = self.tokenizer.batch_encode(batch) 45 | encoded_input = {k: v.to(self._device) for k, v in encoded_input.items()} 46 | with torch.no_grad(): 47 | model_output = self.model(**encoded_input) 48 | embeddings = model_output[0].cpu() 49 | for embs, mask, token_alignment \ 50 | in zip(embeddings, special_tokens_masks, token_alignments): 51 | mask = torch.tensor(mask) 52 | embs = embs[mask == 0] # Filter embeddings at special token positions 53 | spacy_embs = [] 54 | for hf_idxs in token_alignment: 55 | if hf_idxs is None: 56 | pooled_embs = torch.zeros_like(embs[0]) 57 | else: 58 | pooled_embs = embs[hf_idxs].mean(dim=0) # Pool embeddings that map to the same spacy token 59 | spacy_embs.append(pooled_embs.numpy()) 60 | spacy_embs = np.stack(spacy_embs) 61 | spacy_embs = spacy_embs / np.linalg.norm(spacy_embs, axis=-1, keepdims=True) # Normalize 62 | spacy_embs_list.append(spacy_embs) 63 | for embs, sent in zip(spacy_embs_list, sents): 64 | assert len(embs) == len(sent) 65 | return spacy_embs_list 66 | 67 | 68 | class StaticEmbedding(EmbeddingModel): 69 | 70 | def embed( 71 | self, 72 | sents: List[Span] 73 | ): 74 | return [ 75 | np.stack([t.vector / (t.vector_norm or 1) for t in sent]) 76 | for sent in sents 77 | ] 78 | 79 | 80 | class Aligner(ABC): 81 | @abstractmethod 82 | def align( 83 | self, 84 | source: Doc, 85 | targets: Sequence[Doc] 86 | ) -> List[Dict]: 87 | """Compute alignment from summary tokens to doc tokens 88 | Args: 89 | source: Source spaCy document 90 | targets: Target spaCy documents 91 | Returns: List of alignments, one for each target document""" 92 | pass 93 | 94 | 95 | class EmbeddingAligner(Aligner): 96 | 97 | def __init__( 98 | self, 99 | embedding: EmbeddingModel, 100 | threshold: float, 101 | top_k: int, 102 | baseline_val=0 103 | ): 104 | self.threshold = threshold 105 | self.top_k = top_k 106 | self.embedding = embedding 107 | self.baseline_val = baseline_val 108 | 109 | def align( 110 | self, 111 | source: Doc, 112 | targets: Sequence[Doc] 113 | ) -> List[Dict]: 114 | """Compute alignment from summary tokens to doc tokens with greatest semantic similarity 115 | Args: 116 | source: Source spaCy document 117 | targets: Target spaCy documents 118 | Returns: List of alignments, one for each target document 119 | """ 120 | if len(source) == 0: 121 | return [{} for _ in targets] 122 | all_sents = list(source.sents) + list(itertools.chain.from_iterable(target.sents for target in targets)) 123 | chunk_sizes = [_iter_len(source.sents)] + \ 124 | [_iter_len(target.sents) for target in targets] 125 | all_sents_token_embeddings = self.embedding.embed(all_sents) 126 | chunked_sents_token_embeddings = _split(all_sents_token_embeddings, chunk_sizes) 127 | source_sent_token_embeddings = chunked_sents_token_embeddings[0] 128 | source_token_embeddings = np.concatenate(source_sent_token_embeddings) 129 | for token_idx, token in enumerate(source): 130 | if token.is_stop or token.is_punct: 131 | source_token_embeddings[token_idx] = 0 132 | alignments = [] 133 | for i, target in enumerate(targets): 134 | target_sent_token_embeddings = chunked_sents_token_embeddings[i + 1] 135 | target_token_embeddings = np.concatenate(target_sent_token_embeddings) 136 | for token_idx, token in enumerate(target): 137 | if token.is_stop or token.is_punct: 138 | target_token_embeddings[token_idx] = 0 139 | alignment = defaultdict(list) 140 | for score, target_idx, source_idx in self._emb_sim_sparse( 141 | target_token_embeddings, 142 | source_token_embeddings, 143 | ): 144 | alignment[target_idx].append((source_idx, score)) 145 | # TODO used argpartition to get nlargest 146 | for j in list(alignment): 147 | alignment[j] = heapq.nlargest(self.top_k, alignment[j], itemgetter(1)) 148 | alignments.append(alignment) 149 | return alignments 150 | 151 | def _emb_sim_sparse(self, embs_1, embs_2): 152 | sim = embs_1 @ embs_2.T 153 | sim = (sim - self.baseline_val) / (1 - self.baseline_val) 154 | keep = sim > self.threshold 155 | keep_idxs_1, keep_idxs_2 = np.where(keep) 156 | keep_scores = sim[keep] 157 | return list(zip(keep_scores, keep_idxs_1, keep_idxs_2)) 158 | 159 | 160 | class BertscoreAligner(EmbeddingAligner): 161 | def __init__( 162 | self, 163 | threshold, 164 | top_k 165 | ): 166 | scorer = BERTScorer(lang="en", rescale_with_baseline=True) 167 | model = scorer._model 168 | embedding = ContextualEmbedding(model, "roberta-large", 510) 169 | baseline_val = scorer.baseline_vals[2].item() 170 | 171 | super(BertscoreAligner, self).__init__( 172 | embedding, threshold, top_k, baseline_val 173 | ) 174 | 175 | 176 | class StaticEmbeddingAligner(EmbeddingAligner): 177 | def __init__( 178 | self, 179 | threshold, 180 | top_k 181 | ): 182 | embedding = StaticEmbedding() 183 | super(StaticEmbeddingAligner, self).__init__( 184 | embedding, threshold, top_k 185 | ) 186 | 187 | 188 | class NGramAligner(Aligner): 189 | 190 | def __init__(self): 191 | self.stemmer = PorterStemmer() 192 | 193 | def align( 194 | self, 195 | source: Doc, 196 | targets: List[Doc], 197 | ) -> List[Dict]: 198 | 199 | alignments = [] 200 | source_ngram_spans = self._get_ngram_spans(source) 201 | for target in targets: 202 | target_ngram_spans = self._get_ngram_spans(target) 203 | alignments.append( 204 | self._align_ngrams(target_ngram_spans, source_ngram_spans) 205 | ) 206 | return alignments 207 | 208 | def _get_ngram_spans( 209 | self, 210 | doc: Doc, 211 | ): 212 | ngrams = [] 213 | for sent in doc.sents: 214 | for n in range(1, len(list(sent))): 215 | tokens = [t for t in sent if not (t.is_stop or t.is_punct)] 216 | ngrams.extend(_ngrams(tokens, n)) 217 | 218 | def ngram_key(ngram): 219 | return tuple(self.stemmer.stem(token.text).lower() for token in ngram) 220 | 221 | key_to_ngrams = itertoolz.groupby(ngram_key, ngrams) 222 | key_to_spans = {} 223 | for k, grouped_ngrams in key_to_ngrams.items(): 224 | key_to_spans[k] = [ 225 | (ngram[0].i, ngram[-1].i + 1) 226 | for ngram in grouped_ngrams 227 | ] 228 | return key_to_spans 229 | 230 | def _align_ngrams( 231 | self, 232 | ngram_spans_1: Dict[Tuple[str], List[Tuple[int, int]]], 233 | ngram_spans_2: Dict[Tuple[str], List[Tuple[int, int]]] 234 | ) -> Dict[Tuple[int, int], List[Tuple[int, int]]]: 235 | """Align ngram spans between two documents 236 | Args: 237 | ngram_spans_1: Map from (normalized_token1, normalized_token2, ...) n-gram tuple to a list of token spans 238 | of format (start_pos, end_pos) 239 | ngram_spans_2: Same format as above, but for second text 240 | Returns: map from each (start, end) span in text 1 to list of aligned (start, end) spans in text 2 241 | """ 242 | if not ngram_spans_1 or not ngram_spans_2: 243 | return {} 244 | max_span_end_1 = max(span[1] for span in itertools.chain.from_iterable(ngram_spans_1.values())) 245 | token_is_available_1 = [True] * max_span_end_1 # 246 | matched_keys = list(set(ngram_spans_1.keys()) & set(ngram_spans_2.keys())) # Matched normalized ngrams betwee 247 | matched_keys.sort(key=len, reverse=True) # Process n-grams from longest to shortest 248 | 249 | alignment = defaultdict(list) # Map from each matched span in text 1 to list of aligned spans in text 2 250 | for key in matched_keys: 251 | spans_1 = ngram_spans_1[key] 252 | spans_2 = ngram_spans_2[key] 253 | available_spans_1 = [span for span in spans_1 if all(token_is_available_1[slice(*span)])] 254 | matched_spans_1 = [] 255 | if available_spans_1 and spans_2: 256 | # if ngram can be matched to available spans in both sequences 257 | for span in available_spans_1: 258 | # It's possible that these newly matched spans may be overlapping with one another, so 259 | # check that token positions still available (only one span allowed ber token in text 1): 260 | if all(token_is_available_1[slice(*span)]): 261 | matched_spans_1.append(span) 262 | token_is_available_1[slice(*span)] = [False] * (span[1] - span[0]) 263 | for span1 in matched_spans_1: 264 | alignment[span1] = spans_2 265 | 266 | return alignment 267 | 268 | 269 | class SpacyHuggingfaceTokenizer: 270 | def __init__( 271 | self, 272 | model_name, 273 | max_length 274 | ): 275 | self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) 276 | self.max_length = max_length 277 | 278 | def batch_encode( 279 | self, 280 | sents: List[Span] 281 | ): 282 | token_alignments = [] 283 | token_ids_list = [] 284 | 285 | # Tokenize each sentence and special tokens. 286 | for sent in sents: 287 | hf_tokens, token_alignment = self.tokenize(sent) 288 | token_alignments.append(token_alignment) 289 | token_ids = self.tokenizer.convert_tokens_to_ids(hf_tokens) 290 | encoding = self.tokenizer.prepare_for_model( 291 | token_ids, 292 | add_special_tokens=True, 293 | padding=False, 294 | ) 295 | token_ids_list.append(encoding['input_ids']) 296 | 297 | # Add padding 298 | max_length = max(map(len, token_ids_list)) 299 | attention_mask = [] 300 | input_ids = [] 301 | special_tokens_masks = [] 302 | for token_ids in token_ids_list: 303 | encoding = self.tokenizer.prepare_for_model( 304 | token_ids, 305 | padding=PaddingStrategy.MAX_LENGTH, 306 | max_length=max_length, 307 | add_special_tokens=False 308 | ) 309 | input_ids.append(encoding['input_ids']) 310 | attention_mask.append(encoding['attention_mask']) 311 | special_tokens_masks.append( 312 | self.tokenizer.get_special_tokens_mask( 313 | encoding['input_ids'], 314 | already_has_special_tokens=True 315 | ) 316 | ) 317 | 318 | encoded = { 319 | 'input_ids': torch.tensor(input_ids), 320 | 'attention_mask': torch.tensor(attention_mask) 321 | } 322 | return encoded, special_tokens_masks, token_alignments 323 | 324 | def tokenize( 325 | self, 326 | sent 327 | ): 328 | """Convert spacy sentence to huggingface tokens and compute the alignment""" 329 | hf_tokens = [] 330 | token_alignment = [] 331 | for i, token in enumerate(sent): 332 | # "Tokenize" each word individually, so as to track the alignment between spaCy/HF tokens 333 | # Prefix all tokens with a space except the first one in the sentence 334 | if i == 0: 335 | token_text = token.text 336 | else: 337 | token_text = ' ' + token.text 338 | start_hf_idx = len(hf_tokens) 339 | word_tokens = self.tokenizer.tokenize(token_text) 340 | end_hf_idx = len(hf_tokens) + len(word_tokens) 341 | if end_hf_idx < self.max_length: 342 | hf_tokens.extend(word_tokens) 343 | hf_idxs = list(range(start_hf_idx, end_hf_idx)) 344 | else: 345 | hf_idxs = None 346 | token_alignment.append(hf_idxs) 347 | return hf_tokens, token_alignment 348 | 349 | 350 | def _split(data, sizes): 351 | it = iter(data) 352 | return [[next(it) for _ in range(size)] for size in sizes] 353 | 354 | 355 | def _iter_len(it): 356 | return sum(1 for _ in it) 357 | 358 | # TODO set up batching 359 | # To get top K axis and value per row: https://stackoverflow.com/questions/42832711/using-np-argpartition-to-index-values-in-a-multidimensional-array 360 | 361 | 362 | def _ngrams(tokens, n): 363 | for i in range(len(tokens) - n + 1): 364 | yield tokens[i:i + n] 365 | -------------------------------------------------------------------------------- /components.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import count 3 | from operator import itemgetter 4 | from pathlib import Path 5 | from typing import Dict, Optional 6 | from typing import List, Tuple, Union 7 | 8 | import htbuilder 9 | import streamlit as st 10 | from htbuilder import span, div, script, style, link, styles, HtmlElement, br 11 | from htbuilder.units import px 12 | from spacy.tokens import Doc 13 | 14 | palette = [ 15 | "#66c2a5", 16 | "#fc8d62", 17 | "#8da0cb", 18 | "#e78ac3", 19 | "#a6d854", 20 | "#ffd92f", 21 | "#e5c494", 22 | "#b3b3b3", 23 | ] 24 | inactive_color = "#BBB" 25 | 26 | 27 | def local_stylesheet(path): 28 | with open(path) as f: 29 | css = f.read() 30 | return style()( 31 | css 32 | ) 33 | 34 | 35 | def remote_stylesheet(url): 36 | return link( 37 | href=url 38 | ) 39 | 40 | 41 | def local_script(path): 42 | with open(path) as f: 43 | code = f.read() 44 | return script()( 45 | code 46 | ) 47 | 48 | 49 | def remote_script(url): 50 | return script( 51 | src=url 52 | ) 53 | 54 | 55 | def get_color(sent_idx): 56 | return palette[sent_idx % len(palette)] 57 | 58 | 59 | def hex_to_rgb(hex): 60 | hex = hex.replace("#", '') 61 | return tuple(int(hex[i:i + 2], 16) for i in (0, 2, 4)) 62 | 63 | 64 | def color_with_opacity(hex_color, opacity): 65 | rgb = hex_to_rgb(hex_color) 66 | return f"rgba({rgb[0]},{rgb[1]},{rgb[2]},{opacity:.2f})" 67 | 68 | 69 | class Component: 70 | 71 | def show(self, width=None, height=None, scrolling=True, **kwargs): 72 | out = div(style=styles( 73 | **kwargs 74 | ))(self.html()) 75 | html = str(out) 76 | st.components.v1.html(html, width=width, height=height, scrolling=scrolling) 77 | 78 | def html(self): 79 | raise NotImplemented 80 | 81 | 82 | class MainView(Component): 83 | 84 | def __init__( 85 | self, 86 | document: Doc, 87 | summaries: List[Doc], 88 | semantic_alignments: Optional[List[Dict]], 89 | lexical_alignments: Optional[List[Dict]], 90 | layout: str, 91 | scroll: bool, 92 | gray_out_stopwords: bool 93 | ): 94 | self.document = document 95 | self.summaries = summaries 96 | self.semantic_alignments = semantic_alignments 97 | self.lexical_alignments = lexical_alignments 98 | self.layout = layout 99 | self.scroll = scroll 100 | self.gray_out_stopwords = gray_out_stopwords 101 | 102 | def html(self): 103 | 104 | # Add document elements 105 | if self.document._.name == 'Document': 106 | document_name = 'Source Document' 107 | else: 108 | document_name = self.document._.name + ' summary' 109 | doc_header = div( 110 | id_="document-header" 111 | )( 112 | document_name 113 | ) 114 | doc_elements = [] 115 | 116 | # Add document content, which comprises multiple elements, one for each summary. Only the elment corresponding to 117 | # selected summary will be visible. 118 | 119 | mu = MultiUnderline() 120 | 121 | for summary_idx, summary in enumerate(self.summaries): 122 | token_idx_to_sent_idx = {} 123 | for sent_idx, sent in enumerate(summary.sents): 124 | for token in sent: 125 | token_idx_to_sent_idx[token.i] = sent_idx 126 | is_selected_summary = (summary_idx == 0) # By default, first summary is selected 127 | 128 | if self.semantic_alignments is not None: 129 | doc_token_idx_to_matches = defaultdict(list) 130 | semantic_alignment = self.semantic_alignments[summary_idx] 131 | for summary_token_idx, matches in semantic_alignment.items(): 132 | for doc_token_idx, sim in matches: 133 | doc_token_idx_to_matches[doc_token_idx].append((summary_token_idx, sim)) 134 | else: 135 | doc_token_idx_to_matches = {} 136 | 137 | token_elements = [] 138 | for doc_token_idx, doc_token in enumerate(self.document): 139 | if doc_token.is_stop or doc_token.is_punct: 140 | classes = ["stopword"] 141 | if self.gray_out_stopwords: 142 | classes.append("grayed-out") 143 | el = span( 144 | _class=" ".join(classes) 145 | )( 146 | doc_token.text 147 | ) 148 | 149 | else: 150 | matches = doc_token_idx_to_matches.get(doc_token_idx) 151 | if matches: 152 | summary_token_idx, sim = max(matches, key=itemgetter(1)) 153 | sent_idx = token_idx_to_sent_idx[summary_token_idx] 154 | color_primary = get_color(sent_idx) 155 | highlight_color_primary = color_with_opacity(color_primary, sim) 156 | props = { 157 | 'data-highlight-id': str(doc_token_idx), 158 | 'data-primary-color': highlight_color_primary 159 | } 160 | match_classes = [] 161 | for summary_token_idx, sim in matches: 162 | sent_idx = token_idx_to_sent_idx[summary_token_idx] 163 | match_classes.append(f"summary-highlight-{summary_idx}-{summary_token_idx}") 164 | color = color_with_opacity(get_color(sent_idx), sim) 165 | props[f"data-color-{summary_idx}-{summary_token_idx}"] = color 166 | props["data-match-classes"] = " ".join(match_classes) 167 | el = self._highlight( 168 | doc_token.text, 169 | highlight_color_primary, 170 | color_primary, 171 | match_classes + ["annotation-hidden"], 172 | **props 173 | ) 174 | else: 175 | el = doc_token.text 176 | token_elements.append(el) 177 | 178 | spans = [] 179 | if self.lexical_alignments is not None: 180 | lexical_alignment = self.lexical_alignments[summary_idx] 181 | for summary_span, doc_spans in lexical_alignment.items(): 182 | summary_span_start, summary_span_end = summary_span 183 | span_id = f"{summary_idx}-{summary_span_start}-{summary_span_end}" 184 | sent_idx = token_idx_to_sent_idx[summary_span_start] 185 | for doc_span_start, doc_span_end in doc_spans: 186 | spans.append(( 187 | doc_span_start, 188 | doc_span_end, 189 | sent_idx, 190 | get_color(sent_idx), 191 | span_id 192 | )) 193 | token_elements = mu.markup(token_elements, spans) 194 | 195 | classes = ["main-doc", "bordered"] 196 | if self.scroll: 197 | classes.append("scroll") 198 | 199 | main_doc = div( 200 | _class=" ".join(classes) 201 | )( 202 | token_elements 203 | ), 204 | 205 | classes = ["doc"] 206 | if is_selected_summary: 207 | classes.append("display") 208 | else: 209 | classes.append("nodisplay") 210 | doc_elements.append( 211 | div( 212 | **{ 213 | "class": " ".join(classes), 214 | "data-index": summary_idx 215 | } 216 | )( 217 | main_doc, 218 | div(_class="proxy-doc"), 219 | div(_class="proxy-scroll") 220 | ) 221 | ) 222 | 223 | summary_title = "Summary" 224 | summary_header = div( 225 | id_="summary-header" 226 | )( 227 | summary_title, 228 | div(id="summary-header-gap"), 229 | ) 230 | 231 | summary_items = [] 232 | for summary_idx, summary in enumerate(self.summaries): 233 | token_idx_to_sent_idx = {} 234 | for sent_idx, sent in enumerate(summary.sents): 235 | for token in sent: 236 | token_idx_to_sent_idx[token.i] = sent_idx 237 | 238 | spans = [] 239 | matches_ngram = [False] * len(list(summary)) 240 | if self.lexical_alignments is not None: 241 | lexical_alignment = self.lexical_alignments[summary_idx] 242 | for summary_span in lexical_alignment.keys(): 243 | start, end = summary_span 244 | matches_ngram[slice(start, end)] = [True] * (end - start) 245 | span_id = f"{summary_idx}-{start}-{end}" 246 | sent_idx = token_idx_to_sent_idx[start] 247 | spans.append(( 248 | start, 249 | end, 250 | sent_idx, 251 | get_color(sent_idx), 252 | span_id 253 | )) 254 | 255 | if self.semantic_alignments is not None: 256 | semantic_alignment = self.semantic_alignments[summary_idx] 257 | else: 258 | semantic_alignment = {} 259 | token_elements = [] 260 | for token_idx, token in enumerate(summary): 261 | if token.is_stop or token.is_punct: 262 | classes = ["stopword"] 263 | if self.gray_out_stopwords: 264 | classes.append("grayed-out") 265 | el = span( 266 | _class=" ".join(classes) 267 | )( 268 | token.text 269 | ) 270 | else: 271 | classes = [] 272 | if token.ent_iob_ in ('I', 'B'): 273 | classes.append("entity") 274 | if matches_ngram[token_idx]: 275 | classes.append("matches-ngram") 276 | matches = semantic_alignment.get(token_idx) 277 | if matches: 278 | top_match = max(matches, key=itemgetter(1)) 279 | top_sim = max(top_match[1], 0) 280 | top_doc_token_idx = top_match[0] 281 | props = { 282 | "data-highlight-id": f"{summary_idx}-{token_idx}", 283 | "data-top-doc-highlight-id": str(top_doc_token_idx), 284 | "data-top-doc-sim": f"{top_sim:.2f}", 285 | } 286 | classes.extend([ 287 | "annotation-hidden", 288 | f"summary-highlight-{summary_idx}-{token_idx}" 289 | ]) 290 | sent_idx = token_idx_to_sent_idx[token_idx] 291 | el = self._highlight( 292 | token.text, 293 | color_with_opacity(get_color(sent_idx), top_sim), 294 | color_with_opacity(get_color(sent_idx), 1), 295 | classes, 296 | **props 297 | ) 298 | else: 299 | if classes: 300 | el = span(_class=" ".join(classes))(token.text) 301 | else: 302 | el = token.text 303 | token_elements.append(el) 304 | 305 | token_elements = mu.markup(token_elements, spans) 306 | 307 | classes = ["summary-item"] 308 | if summary_idx == 0: # Default is for first summary to be selected 309 | classes.append("selected") 310 | 311 | summary_items.append( 312 | div( 313 | **{"class": ' '.join(classes), "data-index": summary_idx} 314 | )( 315 | div(_class="name")(summary._.name), 316 | div(_class="content")(token_elements) 317 | ) 318 | ) 319 | classes = ["summary-list", "bordered"] 320 | if self.scroll: 321 | classes.append("scroll") 322 | if self.lexical_alignments is not None: 323 | classes.append("has-lexical-alignment") 324 | if self.semantic_alignments is not None: 325 | classes.append("has-semantic-alignment") 326 | summary_list = div( 327 | _class=" ".join(classes) 328 | )( 329 | summary_items 330 | ) 331 | 332 | annotation_key = \ 333 | """ 334 |
    335 |
  • Annotations:
  • 336 |
  • 337 | N-Gram overlap 338 |
  • 339 |
  • 340 | Semantic overlap 341 |
  • 342 |
  • 343 | Novel words 344 |
  • 345 |
  • 346 | Novel entities 347 |
  • 348 | 349 |
350 | """ 351 | 352 | body = div( 353 | annotation_key, 354 | div( 355 | _class=f"vis-container {self.layout}-layout" 356 | )( 357 | div( 358 | _class="doc-container" 359 | )( 360 | doc_header, 361 | *doc_elements 362 | ), 363 | div( 364 | _class="summary-container" 365 | )( 366 | summary_header, 367 | summary_list 368 | ) 369 | ), 370 | ) 371 | return [ 372 | """""", 373 | local_stylesheet(Path(__file__).parent / "resources" / "summvis.css"), 374 | """ 375 | """, 376 | body, 377 | """ 381 | """, 384 | local_script(Path(__file__).parent / "resources" / "jquery.color-2.1.2.min.js"), 385 | local_script(Path(__file__).parent / "resources" / "summvis.js"), 386 | """""" 387 | ] 388 | 389 | def _highlight( 390 | self, 391 | token: Union[str, HtmlElement], 392 | background_color, 393 | dotted_underline_color, 394 | classes: List[str], 395 | **props 396 | ): 397 | return span( 398 | _class=" ".join(classes + ["highlight"]), 399 | style=styles( 400 | background_color=background_color, 401 | border_bottom=f"4px dotted {dotted_underline_color}", 402 | ), 403 | **props 404 | )(token) 405 | 406 | 407 | SPACE = " " 408 | 409 | 410 | class MultiUnderline: 411 | def __init__( 412 | self, 413 | underline_thickness=3, 414 | underline_spacing=1 415 | ): 416 | self.underline_thickness = underline_thickness 417 | self.underline_spacing = underline_spacing 418 | 419 | def markup( 420 | self, 421 | tokens: List[Union[str, HtmlElement]], 422 | spans: List[Tuple[int, int, int, str, str]] 423 | ): 424 | """Style text with multiple layers of colored underlines. 425 | Args: 426 | tokens: list of tokens, either string or html element 427 | spans: list of (start_pos, end_pos, rank, color, id) tuples defined as: 428 | start_pos: start position of underline span 429 | end_pos: end position of underline span 430 | rank: rank for stacking order of underlines, all else being equal 431 | color: color of underline 432 | id: id of underline (encoded as a class label in resulting html element) 433 | Returns: 434 | List of HTML elements 435 | """ 436 | 437 | # Map from span start position to span 438 | start_to_spans = defaultdict(list) 439 | for span in spans: 440 | start = span[0] 441 | start_to_spans[start].append(span) 442 | 443 | # Map from each underline slot position to list of active spans 444 | slot_to_spans = {} 445 | 446 | # Collection of html elements 447 | elements = [] 448 | 449 | first_token_in_line = True 450 | for pos, token in enumerate(tokens): 451 | # Remove spans that are no longer active (end < pos) 452 | slot_to_spans = defaultdict( 453 | list, 454 | { 455 | slot: [span for span in spans if span[1] > pos] # span[1] contains end of spans 456 | for slot, spans in slot_to_spans.items() if spans 457 | } 458 | ) 459 | 460 | # Add underlines to space between tokens for any continuing underlines 461 | if first_token_in_line: 462 | first_token_in_line = False 463 | else: 464 | elements.append(self._get_underline_element(SPACE, slot_to_spans)) 465 | 466 | # Find slot for any new spans 467 | new_spans = start_to_spans.pop(pos, None) 468 | if new_spans: 469 | new_spans.sort( 470 | key=lambda span: (-(span[1] - span[0]), span[2])) # Sort by span length (reversed), rank 471 | for new_span in new_spans: 472 | # Find an existing slot or add a new one 473 | for slot, spans in sorted(slot_to_spans.items(), key=itemgetter(0)): # Sort by slot index 474 | if spans: 475 | containing_span = spans[ 476 | 0] # The first span in the slot strictly contains all other spans 477 | containing_start, containing_end = containing_span[0:2] 478 | containing_color = containing_span[3] 479 | start, end = new_span[0:2] 480 | color = new_span[3] 481 | # If the new span (1) is strictly contained in this span, or (2) exactly matches this span 482 | # and is the same color, then add span to this slot 483 | if end <= containing_end and ( 484 | (start > containing_start or end < containing_end) or 485 | (start == containing_start and end == containing_end and color == containing_color) 486 | ): 487 | spans.append(new_span) 488 | break 489 | else: 490 | # Find a new slot index to add the span 491 | for slot_index in count(): 492 | spans = slot_to_spans[slot_index] 493 | if not spans: # If slot is free, take it 494 | spans.append(new_span) 495 | break 496 | if token in ("\n", "\r", "\r\n"): 497 | elements.append(br()) 498 | first_token_in_line = True 499 | else: 500 | # Add underlines to token for all active spans 501 | elements.append(self._get_underline_element(token, slot_to_spans)) 502 | return elements 503 | 504 | def _get_underline_element(self, token, slot_to_spans): 505 | if not slot_to_spans: 506 | return token 507 | max_slot_index = max(slot_to_spans.keys()) 508 | element = token 509 | for slot_index in range(max_slot_index + 1): 510 | spans = slot_to_spans[slot_index] 511 | if not spans: 512 | color = "rgba(0, 0, 0, 0)" # Transparent element w/opacity=0 513 | props = {} 514 | else: 515 | containing_slot = spans[0] 516 | color = containing_slot[3] 517 | classes = ["underline"] 518 | if token != SPACE: 519 | classes.append("token-underline") 520 | classes.extend([f"span-{span[4]}" for span in spans]) # Encode ids in class names 521 | props = { 522 | "class": " ".join(classes), 523 | "data-primary-color": color 524 | } 525 | if slot_index == 0: 526 | padding_bottom = 0 527 | else: 528 | padding_bottom = self.underline_spacing 529 | display = "inline-block" 530 | element = htbuilder.span( 531 | style=styles( 532 | display=display, 533 | border_bottom=f"{self.underline_thickness}px solid", 534 | border_color=color, 535 | padding_bottom=px(padding_bottom), 536 | ), 537 | **props 538 | )(element) 539 | 540 | # Return outermost nested span 541 | return element 542 | 543 | 544 | if __name__ == "__main__": 545 | from htbuilder import div 546 | 547 | # Test 548 | text = "The quick brown fox jumps" 549 | tokens = text.split() 550 | tokens = [ 551 | "The", 552 | htbuilder.span(style=styles(color="red"))("quick"), 553 | "brown", 554 | "fox", 555 | "jumps" 556 | ] 557 | spans = [ 558 | (0, 2, 0, "green", "green1"), 559 | (1, 3, 0, "orange", "orange1"), 560 | (3, 4, 0, "red", "red1"), 561 | (2, 4, 0, "blue", "blue1"), 562 | (1, 5, 0, "orange", "orange1"), 563 | ] 564 | 565 | mu = MultiUnderline() 566 | html = str(div(mu.markup(tokens, spans))) 567 | print(html) 568 | -------------------------------------------------------------------------------- /examples/wikinews/README.md: -------------------------------------------------------------------------------- 1 | The [WikiNews example](https://en.wikinews.org/wiki/Argentina%27s_first_lady_launches_presidential_bid) was obtained under [Creative Commons 2.5](https://creativecommons.org/licenses/by/2.5/). Article metadata was removed and the body of the text was reformatted. The article headline serves as the summary. -------------------------------------------------------------------------------- /examples/wikinews/load.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Make sure to first run: python -m spacy download en_core_web_lg 4 | 5 | # Generate predictions from 4 models for wikinews.jsonl file 6 | python ../../generation.py --model pegasus-cnndm --data_path wikinews.jsonl && 7 | python ../../generation.py --model pegasus-xsum --data_path wikinews.jsonl && 8 | python ../../generation.py --model bart-cnndm --data_path wikinews.jsonl && 9 | python ../../generation.py --model bart-xsum --data_path wikinews.jsonl && 10 | 11 | # Join predictions with original dataset 12 | python ../../join.py \ 13 | --data_path wikinews.jsonl \ 14 | --generation_paths \ 15 | pegasus-cnndm.wikinews.predictions \ 16 | pegasus-xsum.wikinews.predictions \ 17 | bart-cnndm.wikinews.predictions \ 18 | bart-xsum.wikinews.predictions \ 19 | --output_path wikinews-decoded.jsonl && 20 | 21 | # Cache results 22 | python ../../preprocessing.py \ 23 | --workflow \ 24 | --dataset_jsonl wikinews-decoded.jsonl \ 25 | --processed_dataset_path wikinews.cache 26 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/meta.yaml: -------------------------------------------------------------------------------- 1 | column_dtypes: 2 | BertscoreAligner:spacy:document:spacy:summary:bart-cnndm: &id001 !!python/name:meerkat.columns.list_column.ListColumn '' 3 | BertscoreAligner:spacy:document:spacy:summary:bart-xsum: *id001 4 | BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001 5 | BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum: *id001 6 | BertscoreAligner:spacy:document:spacy:summary:reference: *id001 7 | BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001 8 | BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001 9 | BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001 10 | BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001 11 | NGramAligner:spacy:document:spacy:summary:bart-cnndm: *id001 12 | NGramAligner:spacy:document:spacy:summary:bart-xsum: *id001 13 | NGramAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001 14 | NGramAligner:spacy:document:spacy:summary:pegasus-xsum: *id001 15 | NGramAligner:spacy:document:spacy:summary:reference: *id001 16 | NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001 17 | NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001 18 | NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001 19 | NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001 20 | StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm: *id001 21 | StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum: *id001 22 | StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001 23 | StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum: *id001 24 | StaticEmbeddingAligner:spacy:document:spacy:summary:reference: *id001 25 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001 26 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001 27 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001 28 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001 29 | document: &id002 !!python/name:meerkat.columns.pandas_column.PandasSeriesColumn '' 30 | preprocessed_document: *id002 31 | preprocessed_summary:bart-cnndm: *id002 32 | preprocessed_summary:bart-xsum: *id002 33 | preprocessed_summary:pegasus-cnndm: *id002 34 | preprocessed_summary:pegasus-xsum: *id002 35 | preprocessed_summary:reference: *id002 36 | spacy:document: &id003 !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 37 | spacy:summary:bart-cnndm: *id003 38 | spacy:summary:bart-xsum: *id003 39 | spacy:summary:pegasus-cnndm: *id003 40 | spacy:summary:pegasus-xsum: *id003 41 | spacy:summary:reference: *id003 42 | summary:bart-cnndm: *id002 43 | summary:bart-xsum: *id002 44 | summary:pegasus-cnndm: *id002 45 | summary:pegasus-xsum: *id002 46 | summary:reference: *id002 47 | dtype: !!python/name:meerkat.datapanel.DataPanel '' 48 | len: 1 49 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/data.feather: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/data.feather -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/meta.yaml: -------------------------------------------------------------------------------- 1 | klass: !!python/name:meerkat.block.pandas_block.PandasBlock '' 2 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn '' 2 | len: 1 3 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/document/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/document/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_document/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_document/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:reference/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:reference/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:document/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:document/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:document/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/data.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/data.spacy -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/meta.yaml: -------------------------------------------------------------------------------- 1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 2 | len: 1 3 | state: 4 | _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate '' 5 | _formatter: !!python/name:meerkat.display.auto_formatter '' 6 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/summary:bart-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/summary:bart-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-cnndm/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-cnndm/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-xsum/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-xsum/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/columns/summary:reference/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:reference/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/mgr/meta.yaml: -------------------------------------------------------------------------------- 1 | _column_order: 2 | - summary:reference 3 | - document 4 | - summary:pegasus-cnndm 5 | - summary:pegasus-xsum 6 | - summary:bart-cnndm 7 | - summary:bart-xsum 8 | - preprocessed_document 9 | - preprocessed_summary:reference 10 | - preprocessed_summary:pegasus-cnndm 11 | - preprocessed_summary:pegasus-xsum 12 | - preprocessed_summary:bart-cnndm 13 | - preprocessed_summary:bart-xsum 14 | - spacy:document 15 | - spacy:summary:reference 16 | - spacy:summary:pegasus-cnndm 17 | - spacy:summary:pegasus-xsum 18 | - spacy:summary:bart-cnndm 19 | - spacy:summary:bart-xsum 20 | - BertscoreAligner:spacy:document:spacy:summary:reference 21 | - BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm 22 | - BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum 23 | - BertscoreAligner:spacy:document:spacy:summary:bart-cnndm 24 | - BertscoreAligner:spacy:document:spacy:summary:bart-xsum 25 | - BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm 26 | - BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum 27 | - BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm 28 | - BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum 29 | - StaticEmbeddingAligner:spacy:document:spacy:summary:reference 30 | - StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm 31 | - StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum 32 | - StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm 33 | - StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum 34 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm 35 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum 36 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm 37 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum 38 | - NGramAligner:spacy:document:spacy:summary:reference 39 | - NGramAligner:spacy:document:spacy:summary:pegasus-cnndm 40 | - NGramAligner:spacy:document:spacy:summary:pegasus-xsum 41 | - NGramAligner:spacy:document:spacy:summary:bart-cnndm 42 | - NGramAligner:spacy:document:spacy:summary:bart-xsum 43 | - NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm 44 | - NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum 45 | - NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm 46 | - NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum 47 | columns: 48 | BertscoreAligner:spacy:document:spacy:summary:bart-cnndm: 49 | dtype: &id001 !!python/name:meerkat.columns.list_column.ListColumn '' 50 | len: 1 51 | BertscoreAligner:spacy:document:spacy:summary:bart-xsum: 52 | dtype: *id001 53 | len: 1 54 | BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm: 55 | dtype: *id001 56 | len: 1 57 | BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum: 58 | dtype: *id001 59 | len: 1 60 | BertscoreAligner:spacy:document:spacy:summary:reference: 61 | dtype: *id001 62 | len: 1 63 | BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm: 64 | dtype: *id001 65 | len: 1 66 | BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum: 67 | dtype: *id001 68 | len: 1 69 | BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: 70 | dtype: *id001 71 | len: 1 72 | BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: 73 | dtype: *id001 74 | len: 1 75 | NGramAligner:spacy:document:spacy:summary:bart-cnndm: 76 | dtype: *id001 77 | len: 1 78 | NGramAligner:spacy:document:spacy:summary:bart-xsum: 79 | dtype: *id001 80 | len: 1 81 | NGramAligner:spacy:document:spacy:summary:pegasus-cnndm: 82 | dtype: *id001 83 | len: 1 84 | NGramAligner:spacy:document:spacy:summary:pegasus-xsum: 85 | dtype: *id001 86 | len: 1 87 | NGramAligner:spacy:document:spacy:summary:reference: 88 | dtype: *id001 89 | len: 1 90 | NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm: 91 | dtype: *id001 92 | len: 1 93 | NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum: 94 | dtype: *id001 95 | len: 1 96 | NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: 97 | dtype: *id001 98 | len: 1 99 | NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: 100 | dtype: *id001 101 | len: 1 102 | StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm: 103 | dtype: *id001 104 | len: 1 105 | StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum: 106 | dtype: *id001 107 | len: 1 108 | StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm: 109 | dtype: *id001 110 | len: 1 111 | StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum: 112 | dtype: *id001 113 | len: 1 114 | StaticEmbeddingAligner:spacy:document:spacy:summary:reference: 115 | dtype: *id001 116 | len: 1 117 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm: 118 | dtype: *id001 119 | len: 1 120 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum: 121 | dtype: *id001 122 | len: 1 123 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: 124 | dtype: *id001 125 | len: 1 126 | StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: 127 | dtype: *id001 128 | len: 1 129 | document: 130 | block: 131 | block_dir: blocks/140645734304016 132 | block_index: document 133 | mmap: false 134 | dtype: &id002 !!python/name:meerkat.columns.pandas_column.PandasSeriesColumn '' 135 | len: 1 136 | preprocessed_document: 137 | block: 138 | block_dir: blocks/140645734304016 139 | block_index: preprocessed_document 140 | mmap: false 141 | dtype: *id002 142 | len: 1 143 | preprocessed_summary:bart-cnndm: 144 | block: 145 | block_dir: blocks/140645734304016 146 | block_index: preprocessed_summary:bart-cnndm 147 | mmap: false 148 | dtype: *id002 149 | len: 1 150 | preprocessed_summary:bart-xsum: 151 | block: 152 | block_dir: blocks/140645734304016 153 | block_index: preprocessed_summary:bart-xsum 154 | mmap: false 155 | dtype: *id002 156 | len: 1 157 | preprocessed_summary:pegasus-cnndm: 158 | block: 159 | block_dir: blocks/140645734304016 160 | block_index: preprocessed_summary:pegasus-cnndm 161 | mmap: false 162 | dtype: *id002 163 | len: 1 164 | preprocessed_summary:pegasus-xsum: 165 | block: 166 | block_dir: blocks/140645734304016 167 | block_index: preprocessed_summary:pegasus-xsum 168 | mmap: false 169 | dtype: *id002 170 | len: 1 171 | preprocessed_summary:reference: 172 | block: 173 | block_dir: blocks/140645734304016 174 | block_index: preprocessed_summary:reference 175 | mmap: false 176 | dtype: *id002 177 | len: 1 178 | spacy:document: 179 | dtype: &id003 !!python/name:meerkat.columns.spacy_column.SpacyColumn '' 180 | len: 1 181 | spacy:summary:bart-cnndm: 182 | dtype: *id003 183 | len: 1 184 | spacy:summary:bart-xsum: 185 | dtype: *id003 186 | len: 1 187 | spacy:summary:pegasus-cnndm: 188 | dtype: *id003 189 | len: 1 190 | spacy:summary:pegasus-xsum: 191 | dtype: *id003 192 | len: 1 193 | spacy:summary:reference: 194 | dtype: *id003 195 | len: 1 196 | summary:bart-cnndm: 197 | block: 198 | block_dir: blocks/140645734304016 199 | block_index: summary:bart-cnndm 200 | mmap: false 201 | dtype: *id002 202 | len: 1 203 | summary:bart-xsum: 204 | block: 205 | block_dir: blocks/140645734304016 206 | block_index: summary:bart-xsum 207 | mmap: false 208 | dtype: *id002 209 | len: 1 210 | summary:pegasus-cnndm: 211 | block: 212 | block_dir: blocks/140645734304016 213 | block_index: summary:pegasus-cnndm 214 | mmap: false 215 | dtype: *id002 216 | len: 1 217 | summary:pegasus-xsum: 218 | block: 219 | block_dir: blocks/140645734304016 220 | block_index: summary:pegasus-xsum 221 | mmap: false 222 | dtype: *id002 223 | len: 1 224 | summary:reference: 225 | block: 226 | block_dir: blocks/140645734304016 227 | block_index: summary:reference 228 | mmap: false 229 | dtype: *id002 230 | len: 1 231 | dtype: !!python/name:meerkat.block.manager.BlockManager '' 232 | -------------------------------------------------------------------------------- /examples/wikinews/wikinews.cache/state.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/state.dill -------------------------------------------------------------------------------- /examples/wikinews/wikinews.jsonl: -------------------------------------------------------------------------------- 1 | {"document": "Friday, July 20, 2007\n\nCristina Fern\u00e1ndez de Kirchner\n\nCurrent senator and Argentine First Lady Cristina Fernandez de Kirchner announced her presidential candidacy yesterday evening in La Plata, a city 50 kilometers (31 miles) away from Buenos Aires.\n\nMrs. Kirchner announced her intention to run for president at the Argentine Theatre, the same location she used to start her 2005 campaign for the Senate as member of the Buenos Aires province delegation.\n\nA large security detail was in place at the theatre and eight city blocks were closed to traffic for the event. Outgoing Argentine president N\u00e9stor Kirchner, members of his office, and provincial governors were present at the ceremony. Julio Cobos, governor of Mendoza and possible campaign team member, was also there.\n\nActivists from Quebracho mobilized in La Plata to protest the candidacy of the First Lady, and created some disturbances near the event. None of the protesters was arrested, according to an official spokesman of La Plata Police Station.\n\nRecent polls indicate that Mrs. Kirchner has at least 40 percent voter support, which bodes well for winning on the first ballot in the upcoming October elections.\n\nAccording to Argentine newspaper La Naci\u00f3n, this speech signifies that the government is attempting to show solidarity in the midst of recent scandals: Felisa Miceli's resignation to the Department of Economy after being accused of wrongdoing by a justice over the nearly $100,000 Argentine pesos and US$31,670 found in her office, and the alleged smuggling of weapons and irregularities in the office of the Secretary of Environment.", "summary:reference": "Argentina's first lady to launch presidential bid"} 2 | -------------------------------------------------------------------------------- /examples/xsum/load.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Make sure to first run: python -m spacy download en_core_web_lg 4 | 5 | # Dump one example from XSum validation split to .jsonl format. This may take several minutes if loading for first time. 6 | python ../../preprocessing.py \ 7 | --standardize \ 8 | --dataset xsum \ 9 | --split validation \ 10 | --save_jsonl_path xsum.jsonl \ 11 | --n_samples 1 && 12 | 13 | # Generate predictions from 4 models 14 | python ../../generation.py --model pegasus-cnndm --data_path xsum.jsonl && 15 | python ../../generation.py --model pegasus-xsum --data_path xsum.jsonl && 16 | python ../../generation.py --model bart-cnndm --data_path xsum.jsonl && 17 | python ../../generation.py --model bart-xsum --data_path xsum.jsonl && 18 | 19 | # Join predictions with original dataset 20 | python ../../join.py \ 21 | --data_path xsum.jsonl \ 22 | --generation_paths \ 23 | pegasus-xsum.xsum.predictions \ 24 | pegasus-xsum.xsum.predictions \ 25 | bart-xsum.xsum.predictions \ 26 | bart-xsum.xsum.predictions \ 27 | --output_path xsum-decoded.jsonl 28 | 29 | # Cache results 30 | python ../../preprocessing.py \ 31 | --workflow \ 32 | --dataset_jsonl xsum-decoded.jsonl \ 33 | --processed_dataset_path xsum.cache 34 | -------------------------------------------------------------------------------- /generation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for decoding summarization models available through Huggingface Transformers. 3 | 4 | To use with one of the 6 standard models: 5 | python generation.py --model --data_path 6 | where model abbreviation is one of: bart-xsum, bart-cnndm, pegasus-xsum, pegasus-cnndm, pegasus-newsroom, 7 | pegasus-multinews: 8 | 9 | To use with arbitrary model: 10 | python generation.py --model_name_or_path --data_path 11 | 12 | """ 13 | # !/usr/bin/env python 14 | # coding: utf-8 15 | 16 | import argparse 17 | import json 18 | import os 19 | 20 | import torch 21 | from tqdm import tqdm 22 | from transformers import AutoModelForSeq2SeqLM, AutoTokenizer 23 | 24 | BATCH_SIZE = 8 25 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' 26 | 27 | BART_CNNDM_CHECKPOINT = 'facebook/bart-large-cnn' 28 | BART_XSUM_CHECKPOINT = 'facebook/bart-large-xsum' 29 | PEGASUS_CNNDM_CHECKPOINT = 'google/pegasus-cnn_dailymail' 30 | PEGASUS_XSUM_CHECKPOINT = 'google/pegasus-xsum' 31 | PEGASUS_NEWSROOM_CHECKPOINT = 'google/pegasus-newsroom' 32 | PEGASUS_MULTINEWS_CHECKPOINT = 'google/pegasus-multi_news' 33 | 34 | MODEL_CHECKPOINTS = { 35 | 'bart-xsum': BART_XSUM_CHECKPOINT, 36 | 'bart-cnndm': BART_CNNDM_CHECKPOINT, 37 | 'pegasus-xsum': PEGASUS_XSUM_CHECKPOINT, 38 | 'pegasus-cnndm': PEGASUS_CNNDM_CHECKPOINT, 39 | 'pegasus-newsroom': PEGASUS_NEWSROOM_CHECKPOINT, 40 | 'pegasus-multinews': PEGASUS_MULTINEWS_CHECKPOINT 41 | } 42 | 43 | 44 | class JSONDataset(torch.utils.data.Dataset): 45 | def __init__(self, data_path): 46 | super(JSONDataset, self).__init__() 47 | 48 | with open(data_path) as fd: 49 | self.data = [json.loads(line) for line in fd] 50 | 51 | def __len__(self): 52 | return len(self.data) 53 | 54 | def __getitem__(self, idx): 55 | return self.data[idx] 56 | 57 | 58 | def postprocess_data(decoded): 59 | """ 60 | Remove generation artifacts and postprocess outputs 61 | 62 | :param decoded: model outputs 63 | """ 64 | return [x.replace('', ' ') for x in decoded] 65 | 66 | 67 | if __name__ == '__main__': 68 | parser = argparse.ArgumentParser(description='Process some integers.') 69 | parser.add_argument('--model', type=str) 70 | parser.add_argument('--model_name_or_path', type=str) 71 | parser.add_argument('--data_path', type=str) 72 | args = parser.parse_args() 73 | 74 | if not (args.model or args.model_name_or_path): 75 | raise ValueError('Model is required') 76 | 77 | if args.model and args.model_name_or_path: 78 | raise ValueError('Specify model or model_name_or_path but not both') 79 | 80 | # Load models & data 81 | if args.model: 82 | model_name_or_path = MODEL_CHECKPOINTS[args.model] 83 | file_model_name = args.model 84 | else: 85 | model_name_or_path = args.model_name_or_path 86 | file_model_name = model_name_or_path.replace("/", "-") 87 | model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path).to(DEVICE) 88 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 89 | 90 | dataset = JSONDataset(args.data_path) 91 | 92 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE) 93 | 94 | # Write out dataset 95 | file_dataset_name = os.path.splitext(os.path.basename(args.data_path))[0] 96 | filename = f'{file_model_name}.{file_dataset_name}.predictions' 97 | fd_out = open(filename, 'w') 98 | 99 | model.eval() 100 | with torch.no_grad(): 101 | for raw_data in tqdm(dataloader): 102 | batch = tokenizer(raw_data["document"], return_tensors="pt", truncation=True, padding="longest").to(DEVICE) 103 | summaries = model.generate(input_ids=batch.input_ids, attention_mask=batch.attention_mask) 104 | decoded = tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False) 105 | for example in postprocess_data(decoded): 106 | fd_out.write(example + '\n') 107 | -------------------------------------------------------------------------------- /join.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for joining dataset of documents/reference summaries with generated summaries (likely from generate.py). 3 | 4 | Usage with custom datasets in JSONL format: 5 | python join.py --data_path --generation_paths --output_path 6 | 7 | Optionally specify --model_names to override default model names. 8 | 9 | """ 10 | # !/usr/bin/env python 11 | # coding: utf-8 12 | 13 | import argparse 14 | import json 15 | import os 16 | from pathlib import Path 17 | 18 | import torch 19 | from tqdm import tqdm 20 | 21 | BATCH_SIZE = 8 22 | 23 | 24 | class JSONDataset(torch.utils.data.Dataset): 25 | def __init__(self, data_path): 26 | super(JSONDataset, self).__init__() 27 | 28 | with open(data_path) as fd: 29 | self.data = [json.loads(line) for line in fd] 30 | 31 | def __len__(self): 32 | return len(self.data) 33 | 34 | def __getitem__(self, idx): 35 | return self.data[idx] 36 | 37 | 38 | if __name__ == '__main__': 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--data_path', type=str) 41 | parser.add_argument('--generation_paths', type=str, nargs="+", required=True) 42 | parser.add_argument('--output_path', type=str, required=True) 43 | parser.add_argument('--model_names', type=str, nargs="+") 44 | args = parser.parse_args() 45 | 46 | if args.model_names and len(args.generation_paths) != len(args.model_names): 47 | raise ValueError('Length of args.generation_paths must equal length of args.model_names') 48 | 49 | if args.model_names: 50 | model_names = args.model_names 51 | else: 52 | model_names = [Path(p).name.split(".")[0] for p in args.generation_paths] 53 | 54 | args.dataset = os.path.splitext(os.path.basename(args.data_path))[0] 55 | args.split = 'user' 56 | 57 | # Load data 58 | 59 | dataset = JSONDataset(args.data_path) 60 | 61 | # Join files and write out single jsonl dataset 62 | 63 | generation_files = [open(fname) for fname in args.generation_paths] 64 | 65 | with open(args.output_path, 'w') as outp: 66 | for row in tqdm(zip(dataset, *generation_files)): 67 | # Process each original data record in parallel with generation(s) of the model(s) 68 | result = {} 69 | data = row[0] 70 | generations = row[1:] 71 | result['summary:reference'] = data['summary:reference'] 72 | result['document'] = data['document'] 73 | for model_name, gen in zip(model_names, generations): 74 | result[f'summary:{model_name}'] = gen 75 | outp.write( 76 | json.dumps(result) + '\n' 77 | ) 78 | 79 | for file in generation_files: 80 | file.close() 81 | -------------------------------------------------------------------------------- /preprocessing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from argparse import ArgumentParser 3 | from typing import List 4 | 5 | from meerkat import DataPanel, SpacyColumn 6 | from meerkat.logging.utils import set_logging_level 7 | from spacy import load 8 | 9 | from align import BertscoreAligner, NGramAligner, StaticEmbeddingAligner, Aligner 10 | from utils import clean_text 11 | 12 | set_logging_level('critical') 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.CRITICAL) 15 | 16 | 17 | def _run_aligners( 18 | dataset: DataPanel, 19 | aligners: List[Aligner], 20 | doc_column: str, 21 | reference_column: str, 22 | summary_columns: List[str] = None, 23 | ): 24 | if not summary_columns: 25 | summary_columns = [] 26 | 27 | to_columns = [] 28 | if reference_column is not None: 29 | to_columns.append(reference_column) 30 | to_columns.extend(summary_columns) 31 | 32 | for aligner in aligners: 33 | 34 | # Run the aligner on (document, summary) pairs 35 | dataset = dataset.update( 36 | lambda x: { 37 | f'{type(aligner).__name__}:{doc_column}:{to_columns}': 38 | aligner.align( 39 | x[doc_column], 40 | [x[col] for col in to_columns], 41 | ), 42 | }, 43 | ) 44 | 45 | if reference_column is not None and len(summary_columns): 46 | # Run the aligner on (reference, summary) pairs 47 | dataset = dataset.update( 48 | lambda x: { 49 | f'{type(aligner).__name__}:{reference_column}:{summary_columns}': aligner.align( 50 | x[reference_column], 51 | [x[col] for col in summary_columns], 52 | ), 53 | }, 54 | ) 55 | 56 | if len(to_columns) > 1: 57 | # Instead of having one column for (document, summary) comparisons, split 58 | # off into (1 + |summary_columns|) total columns, one for each comparison 59 | 60 | # Retrieve the (document, summary) column 61 | doc_summary_column = dataset[f'{type(aligner).__name__}:{doc_column}:{to_columns}'] 62 | 63 | for i, col in enumerate(to_columns): 64 | # Add as a new column after encoding with the aligner's `encode` method 65 | dataset.add_column( 66 | f'{type(aligner).__name__}:{doc_column}:{col}', 67 | [row[i] for row in doc_summary_column], 68 | ) 69 | 70 | # Remove the (document, summary) column 71 | dataset.remove_column(f'{type(aligner).__name__}:{doc_column}:{to_columns}') 72 | 73 | if reference_column is not None and len(summary_columns) > 1: 74 | # Instead of having one column for (reference, summary) comparisons, split 75 | # off into (|summary_columns|) total columns, one for each comparison 76 | 77 | # Retrieve the (reference, summary) column 78 | reference_summary_column = dataset[f'{type(aligner).__name__}:{reference_column}:{summary_columns}'] 79 | 80 | for i, col in enumerate(summary_columns): 81 | # Add as a new column 82 | dataset.add_column( 83 | f'{type(aligner).__name__}:{reference_column}:{col}', 84 | [row[i] for row in reference_summary_column], 85 | ) 86 | 87 | # Remove the (reference, summary) column 88 | dataset.remove_column(f'{type(aligner).__name__}:{reference_column}:{summary_columns}') 89 | 90 | return dataset 91 | 92 | 93 | def load_nlp(): 94 | try: 95 | return load('en_core_web_lg') 96 | except OSError: 97 | raise OSError("'en_core_web_lg model' is required unless loading from cached file." 98 | "To install: 'python -m spacy download en_core_web_lg'") 99 | 100 | 101 | def run_workflow( 102 | jsonl_path: str, 103 | doc_column: str = None, 104 | reference_column: str = None, 105 | summary_columns: List[str] = None, 106 | bert_aligner_threshold: float = 0.5, 107 | bert_aligner_top_k: int = 3, 108 | embedding_aligner_threshold: float = 0.5, 109 | embedding_aligner_top_k: int = 3, 110 | processed_dataset_path: str = None, 111 | n_samples: int = None 112 | ): 113 | if not jsonl_path: 114 | raise ValueError("'jsonl_path' is required") 115 | 116 | if not processed_dataset_path: 117 | raise ValueError("Please specify a path to save the dataset.") 118 | 119 | # Load the dataset 120 | dataset = DataPanel.from_jsonl(jsonl_path) 121 | 122 | if doc_column is None: 123 | # Assume `doc_column` is called "document" 124 | doc_column = 'document' 125 | assert doc_column in dataset.columns, \ 126 | f"`doc_column={doc_column}` is not a column in datapanel." 127 | print("Assuming `doc_column` is called 'document'.") 128 | 129 | if reference_column is None: 130 | # Assume `reference_column` is called "summary:reference" 131 | reference_column = 'summary:reference' 132 | print("Assuming `reference_column` is called 'summary:reference'.") 133 | if reference_column not in dataset.columns: 134 | print("No reference summary loaded") 135 | reference_column = None 136 | 137 | if summary_columns is None or len(summary_columns) == 0: 138 | # Assume `summary_columns` are prefixed by "summary:" 139 | summary_columns = [] 140 | for col in dataset.columns: 141 | if col.startswith("summary:") and col != "summary:reference": 142 | summary_columns.append(col) 143 | print(f"Reading summary columns from datapanel. Found {summary_columns}.") 144 | 145 | if len(summary_columns) == 0 and reference_column is None: 146 | raise ValueError("At least one summary is required") 147 | 148 | # Restrict to the first `n_samples` 149 | if n_samples: 150 | print(f"Restricting to {n_samples} samples.") 151 | dataset = dataset.head(n_samples) 152 | 153 | print("size of dataset:", len(dataset)) 154 | 155 | # Combine the text columns into one list 156 | text_columns = [doc_column] + ([reference_column] if reference_column else []) + summary_columns 157 | 158 | # Preprocessing all the text columns 159 | print("Preprocessing text columns") 160 | dataset = dataset.update( 161 | lambda x: { 162 | f'preprocessed_{k}': x[k] if args.no_clean else clean_text(x[k]) 163 | for k in text_columns 164 | } 165 | ) 166 | 167 | # Run the Spacy pipeline on all preprocessed text columns 168 | nlp = load_nlp() 169 | 170 | nlp.add_pipe('sentencizer', before="parser") 171 | 172 | print("Running spacy processing") 173 | for col in text_columns: 174 | dataset.add_column(f'spacy:{col}', SpacyColumn.from_docs(nlp.pipe(dataset[f'preprocessed_{col}']))) 175 | 176 | # Run the 3 align pipelines 177 | bert_aligner = BertscoreAligner( 178 | threshold=bert_aligner_threshold, 179 | top_k=bert_aligner_top_k, 180 | ) 181 | 182 | embedding_aligner = StaticEmbeddingAligner( 183 | threshold=embedding_aligner_threshold, 184 | top_k=embedding_aligner_top_k, 185 | ) 186 | 187 | ngram_aligner = NGramAligner() 188 | 189 | dataset = _run_aligners( 190 | dataset=dataset, 191 | aligners=[bert_aligner, embedding_aligner, ngram_aligner], 192 | doc_column=f'spacy:{doc_column}', 193 | reference_column=f'spacy:{reference_column}' if reference_column else None, 194 | summary_columns=[f'spacy:{col}' for col in summary_columns], 195 | ) 196 | 197 | # Save the dataset 198 | dataset.write(processed_dataset_path) 199 | 200 | return dataset 201 | 202 | 203 | def standardize_dataset( 204 | dataset_name: str, 205 | dataset_version: str, 206 | dataset_split: str, 207 | save_jsonl_path: str, 208 | doc_column: str = None, 209 | reference_column: str = None, 210 | n_samples: int = None 211 | 212 | ): 213 | """Load a dataset from Huggingface and dump it to disk.""" 214 | 215 | if args.dataset is None or \ 216 | args.split is None or \ 217 | args.save_jsonl_path is None: 218 | raise ValueError('Missing command line argument') 219 | 220 | # Load the dataset from Huggingface 221 | dataset = get_dataset( 222 | dataset_name=dataset_name, 223 | dataset_version=dataset_version, 224 | dataset_split=dataset_split 225 | ) 226 | if n_samples: 227 | dataset = dataset[:n_samples] 228 | 229 | if doc_column is None: 230 | if reference_column is not None: 231 | raise ValueError("You must specify `doc_column` if you specify `reference_column`") 232 | try: 233 | doc_column, reference_column = { 234 | 'cnn_dailymail': ('article', 'highlights'), 235 | 'xsum': ('document', 'summary') 236 | }[dataset_name] 237 | except: 238 | raise NotImplementedError( 239 | "Please specify `doc_column`." 240 | ) 241 | 242 | # Rename the columns 243 | if doc_column != 'document': 244 | dataset.add_column('document', dataset[doc_column]) 245 | dataset.remove_column(doc_column) 246 | dataset.add_column('summary:reference', dataset[reference_column]) 247 | dataset.remove_column(reference_column) 248 | 249 | # Save the dataset back to disk 250 | dataset.to_jsonl(save_jsonl_path) 251 | return dataset 252 | 253 | 254 | def get_dataset( 255 | dataset_name: str = None, 256 | dataset_version: str = None, 257 | dataset_split: str = 'test', 258 | dataset_jsonl: str = None, 259 | ): 260 | """Load a dataset.""" 261 | assert (dataset_name is not None) != (dataset_jsonl is not None), \ 262 | "Specify one of `dataset_name` or `dataset_jsonl`." 263 | 264 | # Load the dataset 265 | if dataset_name is not None: 266 | return get_hf_dataset(dataset_name, dataset_version, dataset_split) 267 | 268 | return DataPanel.from_jsonl(json_path=dataset_jsonl) 269 | 270 | 271 | def get_hf_dataset(name: str, version: str = None, split: str = 'test'): 272 | """Get dataset from Huggingface.""" 273 | if version: 274 | return DataPanel.from_huggingface(name, version, split=split) 275 | return DataPanel.from_huggingface(name, split=split) 276 | 277 | 278 | if __name__ == '__main__': 279 | parser = ArgumentParser() 280 | parser.add_argument('--dataset', type=str, choices=['cnn_dailymail', 'xsum'], 281 | help="Huggingface dataset name.") 282 | parser.add_argument('--version', type=str, 283 | help="Huggingface dataset version.") 284 | parser.add_argument('--split', type=str, default='test', 285 | help="Huggingface dataset split.") 286 | parser.add_argument('--dataset_jsonl', type=str, 287 | help="Path to a jsonl file for the dataset.") 288 | parser.add_argument('--save_jsonl_path', type=str, 289 | help="Path to save the processed jsonl dataset.") 290 | parser.add_argument('--doc_column', type=str, 291 | help="Name of the document column in the dataset.") 292 | parser.add_argument('--reference_column', type=str, 293 | help="Name of the reference summary column in the dataset.") 294 | parser.add_argument('--summary_columns', nargs='+', default=[], 295 | help="Name of other summary columns in/added to the dataset.") 296 | 297 | parser.add_argument('--bert_aligner_threshold', type=float, default=0.1, 298 | help="Minimum threshold for BERT alignment.") 299 | parser.add_argument('--bert_aligner_top_k', type=int, default=10, 300 | help="Top-k for BERT alignment.") 301 | parser.add_argument('--embedding_aligner_threshold', type=float, default=0.1, 302 | help="Minimum threshold for embedding alignment.") 303 | parser.add_argument('--embedding_aligner_top_k', type=int, default=10, 304 | help="Top-k for embedding alignment.") 305 | parser.add_argument('--processed_dataset_path', type=str, 306 | help="Path to store the final processed dataset.") 307 | parser.add_argument('--n_samples', type=int, 308 | help="Number of dataset samples to process.") 309 | 310 | parser.add_argument('--workflow', action='store_true', default=False, 311 | help="Whether to run the preprocessing workflow.") 312 | parser.add_argument('--standardize', action='store_true', default=False, 313 | help="Whether to standardize the dataset and save to jsonl.") 314 | parser.add_argument('--no_clean', action='store_true', default=False, 315 | help="Do not clean text (remove extraneous spaces, newlines).") 316 | args = parser.parse_args() 317 | 318 | if args.standardize: 319 | # Dump a Huggingface dataset to standardized jsonl format 320 | standardize_dataset( 321 | dataset_name=args.dataset, 322 | dataset_version=args.version, 323 | dataset_split=args.split, 324 | save_jsonl_path=args.save_jsonl_path, 325 | doc_column=args.doc_column, 326 | reference_column=args.reference_column, 327 | n_samples=args.n_samples 328 | ) 329 | 330 | if args.workflow: 331 | # Run the processing workflow 332 | run_workflow( 333 | jsonl_path=args.dataset_jsonl, 334 | doc_column=args.doc_column, 335 | reference_column=args.reference_column, 336 | summary_columns=args.summary_columns, 337 | bert_aligner_threshold=args.bert_aligner_threshold, 338 | bert_aligner_top_k=args.bert_aligner_top_k, 339 | embedding_aligner_threshold=args.embedding_aligner_threshold, 340 | embedding_aligner_top_k=args.embedding_aligner_top_k, 341 | processed_dataset_path=args.processed_dataset_path, 342 | n_samples=args.n_samples 343 | ) 344 | -------------------------------------------------------------------------------- /quickstart.sh: -------------------------------------------------------------------------------- 1 | mkdir -p data && 2 | mkdir -p preprocessing && 3 | curl https://storage.googleapis.com/sfr-summvis-data-research/cnn_dailymail_1000.validation.anonymized.zip > preprocessing/cnn_dailymail_1000.validation.anonymized.zip && 4 | unzip -o preprocessing/cnn_dailymail_1000.validation.anonymized.zip -d preprocessing/ && 5 | python preprocessing.py \ 6 | --deanonymize \ 7 | --dataset_rg preprocessing/cnn_dailymail_1000.validation.anonymized \ 8 | --dataset cnn_dailymail \ 9 | --version 3.0.0 \ 10 | --split validation \ 11 | --processed_dataset_path data/cnn_dailymail_10.validation \ 12 | --n_samples 10 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # environment.yml must be kept in sync 2 | spacy==3.0.3 3 | streamlit==0.77.0 4 | st-annotated-text==1.1.0 5 | transformers==4.2.2 6 | datasets==1.18.4 7 | torch>=1.8.0,<2.0.0 8 | bert-score==0.3.7 9 | rouge-score==0.0.4 10 | toolz==0.11.1 11 | nltk==3.4.5 12 | meerkat-ml==0.2.4 13 | sentencepiece==0.1.95 14 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz 15 | protobuf~=3.19.0 16 | -------------------------------------------------------------------------------- /resources/jquery.color-2.1.2.min.js: -------------------------------------------------------------------------------- 1 | /*! jQuery Color v@2.1.2 http://github.com/jquery/jquery-color | jquery.org/license */ 2 | (function(a,b){function m(a,b,c){var d=h[b.type]||{};return a==null?c||!b.def?null:b.def:(a=d.floor?~~a:parseFloat(a),isNaN(a)?b.def:d.mod?(a+d.mod)%d.mod:0>a?0:d.max")[0],k,l=a.each;j.style.cssText="background-color:rgba(1,1,1,.5)",i.rgba=j.style.backgroundColor.indexOf("rgba")>-1,l(g,function(a,b){b.cache="_"+a,b.props.alpha={idx:3,type:"percent",def:1}}),f.fn=a.extend(f.prototype,{parse:function(c,d,e,h){if(c===b)return this._rgba=[null,null,null,null],this;if(c.jquery||c.nodeType)c=a(c).css(d),d=b;var i=this,j=a.type(c),o=this._rgba=[];d!==b&&(c=[c,d,e,h],j="array");if(j==="string")return this.parse(n(c)||k._default);if(j==="array")return l(g.rgba.props,function(a,b){o[b.idx]=m(c[b.idx],b)}),this;if(j==="object")return c instanceof f?l(g,function(a,b){c[b.cache]&&(i[b.cache]=c[b.cache].slice())}):l(g,function(b,d){var e=d.cache;l(d.props,function(a,b){if(!i[e]&&d.to){if(a==="alpha"||c[a]==null)return;i[e]=d.to(i._rgba)}i[e][b.idx]=m(c[a],b,!0)}),i[e]&&a.inArray(null,i[e].slice(0,3))<0&&(i[e][3]=1,d.from&&(i._rgba=d.from(i[e])))}),this},is:function(a){var b=f(a),c=!0,d=this;return l(g,function(a,e){var f,g=b[e.cache];return g&&(f=d[e.cache]||e.to&&e.to(d._rgba)||[],l(e.props,function(a,b){if(g[b.idx]!=null)return c=g[b.idx]===f[b.idx],c})),c}),c},_space:function(){var a=[],b=this;return l(g,function(c,d){b[d.cache]&&a.push(c)}),a.pop()},transition:function(a,b){var c=f(a),d=c._space(),e=g[d],i=this.alpha()===0?f("transparent"):this,j=i[e.cache]||e.to(i._rgba),k=j.slice();return c=c[e.cache],l(e.props,function(a,d){var e=d.idx,f=j[e],g=c[e],i=h[d.type]||{};if(g===null)return;f===null?k[e]=g:(i.mod&&(g-f>i.mod/2?f+=i.mod:f-g>i.mod/2&&(f-=i.mod)),k[e]=m((g-f)*b+f,d))}),this[d](k)},blend:function(b){if(this._rgba[3]===1)return this;var c=this._rgba.slice(),d=c.pop(),e=f(b)._rgba;return f(a.map(c,function(a,b){return(1-d)*e[b]+d*a}))},toRgbaString:function(){var b="rgba(",c=a.map(this._rgba,function(a,b){return a==null?b>2?1:0:a});return c[3]===1&&(c.pop(),b="rgb("),b+c.join()+")"},toHslaString:function(){var b="hsla(",c=a.map(this.hsla(),function(a,b){return a==null&&(a=b>2?1:0),b&&b<3&&(a=Math.round(a*100)+"%"),a});return c[3]===1&&(c.pop(),b="hsl("),b+c.join()+")"},toHexString:function(b){var c=this._rgba.slice(),d=c.pop();return b&&c.push(~~(d*255)),"#"+a.map(c,function(a){return a=(a||0).toString(16),a.length===1?"0"+a:a}).join("")},toString:function(){return this._rgba[3]===0?"transparent":this.toRgbaString()}}),f.fn.parse.prototype=f.fn,g.hsla.to=function(a){if(a[0]==null||a[1]==null||a[2]==null)return[null,null,null,a[3]];var b=a[0]/255,c=a[1]/255,d=a[2]/255,e=a[3],f=Math.max(b,c,d),g=Math.min(b,c,d),h=f-g,i=f+g,j=i*.5,k,l;return g===f?k=0:b===f?k=60*(c-d)/h+360:c===f?k=60*(d-b)/h+120:k=60*(b-c)/h+240,h===0?l=0:j<=.5?l=h/i:l=h/(2-i),[Math.round(k)%360,l,j,e==null?1:e]},g.hsla.from=function(a){if(a[0]==null||a[1]==null||a[2]==null)return[null,null,null,a[3]];var b=a[0]/360,c=a[1],d=a[2],e=a[3],f=d<=.5?d*(1+c):d+c-d*c,g=2*d-f;return[Math.round(o(g,f,b+1/3)*255),Math.round(o(g,f,b)*255),Math.round(o(g,f,b-1/3)*255),e]},l(g,function(c,e){var g=e.props,h=e.cache,i=e.to,j=e.from;f.fn[c]=function(c){i&&!this[h]&&(this[h]=i(this._rgba));if(c===b)return this[h].slice();var d,e=a.type(c),k=e==="array"||e==="object"?c:arguments,n=this[h].slice();return l(g,function(a,b){var c=k[e==="object"?a:b.idx];c==null&&(c=n[b.idx]),n[b.idx]=m(c,b)}),j?(d=f(j(n)),d[h]=n,d):f(n)},l(g,function(b,e){if(f.fn[b])return;f.fn[b]=function(f){var g=a.type(f),h=b==="alpha"?this._hsla?"hsla":"rgba":c,i=this[h](),j=i[e.idx],k;return g==="undefined"?j:(g==="function"&&(f=f.call(this,j),g=a.type(f)),f==null&&e.empty?this:(g==="string"&&(k=d.exec(f),k&&(f=j+parseFloat(k[2])*(k[1]==="+"?1:-1))),i[e.idx]=f,this[h](i)))}})}),f.hook=function(b){var c=b.split(" ");l(c,function(b,c){a.cssHooks[c]={set:function(b,d){var e,g,h="";if(d!=="transparent"&&(a.type(d)!=="string"||(e=n(d)))){d=f(e||d);if(!i.rgba&&d._rgba[3]!==1){g=c==="backgroundColor"?b.parentNode:b;while((h===""||h==="transparent")&&g&&g.style)try{h=a.css(g,"backgroundColor"),g=g.parentNode}catch(j){}d=d.blend(h&&h!=="transparent"?h:"_default")}d=d.toRgbaString()}try{b.style[c]=d}catch(j){}}},a.fx.step[c]=function(b){b.colorInit||(b.start=f(b.elem,c),b.end=f(b.end),b.colorInit=!0),a.cssHooks[c].set(b.elem,b.start.transition(b.end,b.pos))}})},f.hook(c),a.cssHooks.borderColor={expand:function(a){var b={};return l(["Top","Right","Bottom","Left"],function(c,d){b["border"+d+"Color"]=a}),b}},k=a.Color.names={aqua:"#00ffff",black:"#000000",blue:"#0000ff",fuchsia:"#ff00ff",gray:"#808080",green:"#008000",lime:"#00ff00",maroon:"#800000",navy:"#000080",olive:"#808000",purple:"#800080",red:"#ff0000",silver:"#c0c0c0",teal:"#008080",white:"#ffffff",yellow:"#ffff00",transparent:[null,null,null,0],_default:"#ffffff"}})(jQuery); -------------------------------------------------------------------------------- /resources/summvis.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Roboto', sans-serif; 3 | font-weight: 400; 4 | line-height: 1.5; 5 | color: #262730; 6 | font-weight: 400; 7 | } 8 | 9 | .vis-container { 10 | height: 670px; 11 | background-color: #F5F7F9; 12 | } 13 | 14 | .nodisplay { 15 | display: none !important; 16 | } 17 | 18 | .scroll { 19 | overflow-y: scroll; 20 | } 21 | 22 | .doc-container { 23 | padding: 10px 20px; 24 | } 25 | 26 | .horizontal-layout .doc-container { 27 | padding-bottom: 0px; 28 | } 29 | 30 | .vertical-layout .doc-container { 31 | float: left; 32 | width: 50%; 33 | padding-right: 0px; 34 | } 35 | 36 | .summary-container { 37 | padding: 0px 20px; 38 | } 39 | 40 | .vertical-layout .summary-container { 41 | float: left; 42 | width: 50%; 43 | padding-top: 8px; 44 | } 45 | 46 | .vertical-layout .main-doc.scroll { 47 | height: 610px; 48 | } 49 | 50 | .main-doc.scroll { 51 | scrollbar-width: none; 52 | } 53 | 54 | /* Works on Chrome, Edge, and Safari */ 55 | .main-doc.scroll::-webkit-scrollbar { 56 | width: 0; 57 | } 58 | 59 | .vertical-layout .proxy-doc { 60 | height: 610px; 61 | } 62 | 63 | .vertical-layout .summary-list.scroll { 64 | height: 610px; 65 | } 66 | 67 | .horizontal-layout .scroll { 68 | height: 270px; 69 | } 70 | 71 | .doc { 72 | display: flex; 73 | } 74 | 75 | .horizontal-layout .doc { 76 | } 77 | 78 | .main-doc { 79 | background-color: white; 80 | padding-left: 17px; 81 | padding-right: 15px; 82 | padding-top: 16px; 83 | border-top-left-radius: 4px; 84 | border-bottom-left-radius: 4px; 85 | flex: 1; 86 | border: 1px solid #e9e9e9; 87 | } 88 | 89 | .display .proxy-scroll { 90 | position: absolute; 91 | left: 9px; 92 | width: 9px; 93 | border-radius: 6px; 94 | background-color: rgba(0, 0, 0, 0.1); 95 | } 96 | 97 | .display .proxy-scroll.hover { 98 | background-color: rgba(0, 0, 0, 0.2); 99 | } 100 | 101 | .proxy-doc { 102 | flex: 0 0 28px; 103 | background-color: white; 104 | position: relative; 105 | border-bottom-right-radius: 4px; 106 | border-top-right-radius: 4px; 107 | padding-left: 3px; 108 | padding-right: 3px; 109 | border-top: 1px solid #e9e9e9; 110 | border-right: 1px solid #e9e9e9; 111 | border-bottom: 1px solid #e9e9e9; 112 | } 113 | 114 | .vertical-layout .proxy-doc { 115 | margin-right: 25px; 116 | } 117 | 118 | .summary-list { 119 | border-top: 1px solid #ccc; 120 | border-bottom: 1px solid #ccc; 121 | border-radius: 4px; 122 | } 123 | 124 | .summary-item { 125 | border-bottom: 1px solid #ccc; 126 | border-left: 1px solid #ccc; 127 | border-right: 1px solid #ccc; 128 | background-color: white; 129 | padding-top: 16px; 130 | padding-bottom: 16px; 131 | padding-left: 23px; 132 | padding-right: 8px; 133 | } 134 | 135 | .summary-item:last-child { 136 | border-bottom: 0px; 137 | border-bottom-left-radius: 3px; 138 | } 139 | 140 | .summary-item.selected.selectable { 141 | border-left: 3px solid #2377E9; 142 | padding-left: 21px; 143 | } 144 | 145 | .summary-item.selectable:not(.selected):hover { 146 | cursor: pointer; 147 | background-color: #FCFDFF; 148 | } 149 | 150 | .summary-item.selected.selectable .highlight:not(.annotation-hidden):hover { 151 | cursor: pointer; 152 | } 153 | 154 | .summary-item.selected.selectable .underline:not(.annotation-hidden):hover { 155 | cursor: pointer; 156 | } 157 | 158 | .summary-item .name { 159 | margin-bottom: 8px; 160 | font-weight: 400; 161 | } 162 | 163 | .summary-item.selected.selectable .name { 164 | font-weight: 500; 165 | } 166 | 167 | .inactive { 168 | opacity: 0.5 !important; 169 | } 170 | 171 | .stopword.grayed-out { 172 | opacity: 50% 173 | } 174 | 175 | .has-lexical-alignment .annotate-novel { 176 | /* Bold all non-underlined items */ 177 | font-weight: 500; 178 | color: black; 179 | } 180 | 181 | .summary-item .stopword { 182 | font-weight: 400; 183 | } 184 | 185 | .summary-item .token-underline { 186 | font-weight: 400; 187 | } 188 | 189 | .summary-item:not(.selected) .underline, .summary-item:not(.selectable) .underline { 190 | border-color: #909090 !important; 191 | } 192 | 193 | .underline.annotation-inactive { 194 | border-color: #E9E9E9 !important; 195 | } 196 | 197 | .underline.annotation-invisible { 198 | border-color: transparent !important; 199 | } 200 | 201 | .underline.annotation-hidden { 202 | border: 0px !important; 203 | margin: 0px !important; 204 | } 205 | 206 | .proxy-underline.annotation-hidden, .proxy-highlight.annotation-hidden { 207 | visibility: hidden; 208 | } 209 | 210 | .proxy-underline.annotation-inactive { 211 | background-color: #E9E9E9 !important; 212 | } 213 | 214 | .proxy-underline.annotation-invisible { 215 | background-color: transparent !important; 216 | } 217 | 218 | .highlight { 219 | display: inline-block; 220 | } 221 | 222 | .highlight.annotation-hidden { 223 | background: none !important; 224 | border-color: transparent !important; 225 | border-bottom: 0px !important; 226 | } 227 | 228 | .highlight.annotation-invisible { 229 | background-color: transparent !important; 230 | border-color: transparent !important; 231 | } 232 | 233 | .summary-item:not(.selected) .highlight:not(.annotation-hidden), 234 | .summary-item:not(.selectable) .highlight:not(.annotation-hidden) { 235 | border-color: #909090 !important; 236 | } 237 | 238 | .highlight.annotation-inactive { 239 | border-color: #E9E9E9 !important; 240 | } 241 | 242 | .display .proxy-scroll.hidden { 243 | visibility: hidden; 244 | } 245 | 246 | #document-header { 247 | min-height: 35px; 248 | margin-bottom: 0px; 249 | align-items: center; 250 | color: black; 251 | display: flex; 252 | } 253 | 254 | #summary-header { 255 | display: flex; 256 | justify-content: space-between; 257 | align-items: center; 258 | min-height: 35px; 259 | margin-bottom: 0px; 260 | color: black; 261 | } 262 | 263 | .horizontal-layout #summary-header { 264 | margin-top: 23px; 265 | } 266 | 267 | #summary-header-gap { 268 | flex: 1 0 15px; 269 | } 270 | 271 | .highlight.selected { 272 | border-color: transparent !important; 273 | } 274 | 275 | .highlight:not(.selected), .proxy-highlight:not(.selected) { 276 | background-color: transparent !important; 277 | } 278 | 279 | .summary-item.annotate-entities .entity:not(.matches-ngram) { 280 | color: #fb425c; 281 | font-weight: 500; 282 | } 283 | 284 | .summary-item.annotate-lexical .highlight.matches-ngram { 285 | padding: 0px; 286 | border-bottom: 0px !important; 287 | } 288 | 289 | .doc .highlight { 290 | padding: 0px; 291 | border: 0px !important; 292 | } 293 | 294 | ul.annotation-key { 295 | display: flex; 296 | align-items: flex-end; 297 | list-style: none; 298 | justify-content: flex-start; 299 | padding: 0px; 300 | margin: 0px 0px 10px 0px; 301 | } 302 | 303 | .annotation-key li { 304 | margin-right: 15px; 305 | font-size: 13px; 306 | padding: 6px 13px 6px 13px; 307 | } 308 | 309 | .annotation-key li.option { 310 | border-radius: 13px; 311 | cursor: pointer; 312 | border: 1px solid #F3F3F3; 313 | } 314 | 315 | .annotation-key li.option.selected { 316 | background-color: #F0F2F6; 317 | } 318 | 319 | .annotation-key-label { 320 | margin: 0px; 321 | padding-left: 0px !important; 322 | padding-right: 0px !important; 323 | } 324 | 325 | .annotation-key-ngram { 326 | border-bottom: 3px solid #66c2a5; 327 | padding-bottom: 1px; 328 | } 329 | 330 | .annotation-key-semantic { 331 | border-bottom: 4px dotted #66c2a5; 332 | padding-bottom: 1px; 333 | } 334 | 335 | .annotation-key-novel { 336 | font-weight: 500; 337 | color: black; 338 | } 339 | 340 | .annotation-key-entity { 341 | font-weight: 500; 342 | color: #fb425c; 343 | } 344 | 345 | .annotation-key-stopword { 346 | opacity: 70%; 347 | } 348 | -------------------------------------------------------------------------------- /resources/summvis.js: -------------------------------------------------------------------------------- 1 | $(document).ready( 2 | function () { 3 | 4 | // Define global variables 5 | 6 | let isDragging = false; 7 | let saveDragPos; 8 | 9 | let rtime; 10 | let timeout = false; 11 | let delta = 200; 12 | 13 | let disableScrollEvent = false; 14 | 15 | let annotateLexical = false; 16 | let annotateSemantic = false; 17 | let annotateNovel = false; 18 | let annotateEntities = false; 19 | 20 | // Define functions 21 | 22 | function clamp(number, min, max) { 23 | return Math.max(min, Math.min(number, max)); 24 | } 25 | 26 | function hasScroll() { 27 | const el = $(".display .main-doc"); 28 | return el.prop("scrollHeight") > el.prop("clientHeight"); 29 | } 30 | 31 | function scrollBy(delta) { 32 | const proxyDoc = $(".display .proxy-doc"); 33 | const proxyScroll = proxyDoc.find(".proxy-scroll"); 34 | const currentTop = parseFloat(proxyScroll.css("top")); 35 | const newTop = clamp(currentTop + delta, 0, proxyDoc.innerHeight() - proxyScroll.innerHeight()); 36 | proxyScroll.css("top", newTop); 37 | const mainDoc = $(".display .main-doc"); 38 | const scaleY = mainDoc[0].scrollHeight / proxyDoc.innerHeight(); 39 | mainDoc.scrollTop(newTop * scaleY) 40 | } 41 | 42 | function getSpanId(el) { 43 | return getSpanIds(el)[0] 44 | } 45 | 46 | function getSpanIds(el) { 47 | return el.attr("class").split(/\s+/).filter(function (x) { 48 | return x.startsWith("span-") 49 | }); 50 | } 51 | 52 | function createProxy() { 53 | const mainDoc = $(".display .main-doc"); 54 | const proxyDoc = $(".display .proxy-doc"); 55 | const proxyHeight = proxyDoc.innerHeight(); 56 | const proxyWidth = proxyDoc.innerWidth(); 57 | const scaleX = 0.8 * proxyWidth / mainDoc.innerWidth(); 58 | const scaleY = proxyHeight / mainDoc[0].scrollHeight; 59 | const scrollTop = mainDoc.scrollTop(); 60 | const proxyScrollTop = scrollTop * scaleY; 61 | const proxyScrollBottom = (scrollTop + mainDoc.innerHeight()) * scaleY; 62 | const proxyScrollHeight = proxyScrollBottom - proxyScrollTop; 63 | proxyDoc.empty(); 64 | 65 | // Loop through underlines in doc view and create associated proxy element 66 | if (annotateLexical) { 67 | $(".display .main-doc .token-underline").each( 68 | function (index, value) { 69 | const el = $(value); 70 | const x = el.position().left; 71 | const y = mainDoc.scrollTop() + el.position().top - mainDoc.position().top; 72 | const newHeight = 3; 73 | const color = el.css("border-bottom-color"); 74 | const proxyPadding = proxyDoc.innerWidth() - proxyDoc.width(); 75 | const newX = x * scaleX + proxyPadding / 2; 76 | const newY = (y + el.height()) * scaleY - newHeight; 77 | const newWidth = Math.min( 78 | Math.max((el.width() * scaleX) + 1, 5), 79 | proxyDoc.width() + proxyPadding / 2 - newX 80 | ); 81 | 82 | let classes = "proxy-underline annotation-hidden " + getSpanIds(el).join(" "); 83 | const proxyEl = $('
', { 84 | "class": classes, 85 | "css": { 86 | "position": "absolute", 87 | "left": Math.round(newX), 88 | "top": Math.round(newY), 89 | "background-color": color, 90 | "width": newWidth, 91 | "height": newHeight, 92 | } 93 | }).appendTo(proxyDoc); 94 | proxyEl.data(el.data()); 95 | } 96 | ); 97 | } 98 | 99 | // Loop through all active highlights in doc view and create associated proxy element 100 | if (annotateSemantic) { 101 | $(".display .main-doc .highlight").each( 102 | function (index, value) { 103 | const el = $(value); 104 | const x = el.position().left; 105 | const y = mainDoc.scrollTop() + el.position().top - mainDoc.position().top; 106 | const newHeight = 5; 107 | const color = el.css("background-color"); 108 | const proxyPadding = proxyDoc.innerWidth() - proxyDoc.width() 109 | const newX = x * scaleX + proxyPadding / 2; 110 | const newY = (y + el.height()) * scaleY - newHeight; 111 | const newWidth = Math.min( 112 | Math.max((el.width() * scaleX) + 1, 5), 113 | proxyDoc.width() + proxyPadding / 2 - newX 114 | ); 115 | const proxyEl = $('
', { 116 | "class": 'proxy-highlight annotation-hidden', 117 | "css": { 118 | "position": "absolute", 119 | "left": Math.round(newX), 120 | "top": Math.round(newY), 121 | "background-color": color, 122 | "width": newWidth, 123 | "height": newHeight, 124 | } 125 | }).appendTo(proxyDoc); 126 | // Copy data attributes 127 | proxyEl.data(el.data()); 128 | // Set classes for matching 129 | proxyEl.addClass(el.data("match-classes")) 130 | } 131 | ); 132 | } 133 | $('
', { 134 | "class": 'proxy-scroll hidden', 135 | "css": { 136 | "top": proxyScrollTop, 137 | "height": proxyScrollHeight, 138 | } 139 | }).appendTo(proxyDoc); 140 | if (hasScroll()) { 141 | $(".display .proxy-scroll").removeClass("hidden") 142 | } 143 | 144 | $(".display .proxy-doc") 145 | .mousedown(function (event) { 146 | saveDragPos = parseFloat(event.pageY); 147 | isDragging = true; 148 | event.preventDefault(); 149 | }) 150 | .mousemove(function (event) { 151 | const dragPos = parseFloat(event.pageY); 152 | if (isDragging) { 153 | const distanceMoved = dragPos - saveDragPos; 154 | scrollBy(distanceMoved); 155 | saveDragPos = dragPos; 156 | event.preventDefault(); 157 | } 158 | }) 159 | .mouseup(function (event) { 160 | isDragging = false; 161 | }) 162 | .mouseenter(function () { 163 | disableScrollEvent = true; 164 | $(".display .proxy-scroll").addClass("hover") 165 | }) 166 | .mouseleave(function () { 167 | isDragging = false; 168 | disableScrollEvent = false; 169 | $(".display .proxy-scroll").removeClass("hover") 170 | }) 171 | .on('wheel', function (event) { 172 | scrollBy(event.originalEvent.deltaY / 4); 173 | event.preventDefault(); 174 | }); 175 | 176 | // TODO: Handle user clicking in scroll region 177 | 178 | $(".display .main-doc").scroll(function () { 179 | if (disableScrollEvent) return; 180 | $(".display .proxy-scroll") 181 | .css( 182 | "top", $(this).scrollTop() * scaleY 183 | ) 184 | }) 185 | } 186 | 187 | function resizeend() { 188 | if (new Date() - rtime < delta) { 189 | setTimeout(resizeend, delta); 190 | } else { 191 | timeout = false; 192 | updateAnnotations(); 193 | toggleScrollbar(); 194 | } 195 | } 196 | 197 | function toggleScrollbar() { 198 | if (hasScroll()) { 199 | $(".display .proxy-scroll").removeClass("hidden"); 200 | } else { 201 | $(".display .proxy-scroll").addClass("hidden"); 202 | } 203 | } 204 | 205 | function updateAnnotations() { 206 | 207 | annotateSemantic = $("#option-semantic").hasClass("selected"); 208 | annotateLexical = $("#option-lexical").hasClass("selected"); 209 | annotateEntities = $("#option-entity").hasClass("selected"); 210 | annotateNovel = $("#option-novel").hasClass("selected"); 211 | 212 | if (annotateSemantic || annotateLexical) { 213 | $(".summary-item").addClass("selectable") 214 | } else { 215 | $(".summary-item").removeClass("selectable") 216 | } 217 | 218 | if (annotateLexical) { 219 | $(".underline").removeClass("annotation-hidden"); 220 | $(".summary-item").addClass("annotate-lexical"); 221 | } else { 222 | $(".underline").addClass("annotation-hidden"); 223 | $(".summary-item").removeClass("annotate-lexical"); 224 | } 225 | if (annotateSemantic) { 226 | $(".highlight").removeClass("annotation-hidden"); 227 | } else { 228 | $(".highlight").addClass("annotation-hidden"); 229 | } 230 | if (annotateEntities) { 231 | $(".summary-item").addClass("annotate-entities") 232 | } else { 233 | $(".summary-item").removeClass("annotate-entities") 234 | } 235 | if (annotateNovel) { 236 | $(".summary-item").addClass("annotate-novel") 237 | } else { 238 | $(".summary-item").removeClass("annotate-novel") 239 | } 240 | 241 | createProxy(); 242 | 243 | if (annotateLexical) { 244 | $(".proxy-underline").removeClass("annotation-hidden"); 245 | } else { 246 | $(".proxy-underline").addClass("annotation-hidden"); 247 | } 248 | if (annotateSemantic) { 249 | $(".proxy-highlight").removeClass("annotation-hidden"); 250 | } else { 251 | $(".proxy-highlight").addClass("annotation-hidden"); 252 | } 253 | 254 | $(".summary-item .highlight").tooltip("disable"); 255 | if (annotateSemantic) { 256 | $(".summary-item.selected .highlight").tooltip("enable") 257 | } 258 | } 259 | 260 | function removeDocTooltips() { 261 | $("[data-tooltip-timestamp]").tooltip("dispose").removeAttr("data-tooltip-timestamp"); 262 | } 263 | 264 | function resetUnderlines() { 265 | $('.annotation-invisible').removeClass("annotation-invisible"); 266 | $('.annotation-inactive').removeClass("annotation-inactive"); 267 | $('.temp-underline-color') 268 | .each(function () { 269 | $(this).css("border-color", $(this).data("primary-color")); 270 | }) 271 | .removeClass("temp-underline-color") 272 | $('.temp-proxy-underline-color') 273 | .each(function () { 274 | $(this).css("background-color", $(this).data("primary-color")); 275 | }) 276 | .removeClass("temp-proxy-underline-color") 277 | } 278 | 279 | function showDocTooltip(el) { 280 | const topDocHighlightId = $(el).data("top-doc-highlight-id"); 281 | const topDocSim = $(el).data("top-doc-sim"); 282 | const topHighlight = $(`.display .main-doc .highlight[data-highlight-id=${topDocHighlightId}]`); 283 | if (!isViewable(topHighlight)) { 284 | return; 285 | } 286 | topHighlight.tooltip({title: `Most similar (${topDocSim})`, trigger: "manual", container: "body"}); 287 | topHighlight.tooltip("show"); 288 | const tooltipTimestamp = Date.now(); 289 | // Do not use .data() method to set data attributes as they are not searchable 290 | topHighlight.attr("data-tooltip-timestamp", tooltipTimestamp); 291 | setTimeout(function () { 292 | if (topHighlight.data("tooltip-timestamp") == tooltipTimestamp) { 293 | topHighlight.tooltip("dispose").removeAttr("data-tooltip-timestamp"); 294 | } 295 | }, 8000); 296 | } 297 | 298 | function highlightUnderlines() { 299 | const spanId = getSpanId($(this)); 300 | const color = $(this).css("border-bottom-color"); 301 | // TODO Consolidate into single statement 302 | $(`.summary-item.selected .underline.${spanId}`).removeClass("annotation-inactive"); 303 | $(`.doc .underline.${spanId}`) 304 | .removeClass("annotation-inactive") 305 | .each(function () { 306 | $(this).css("border-bottom-color", color); 307 | }) 308 | .addClass("temp-underline-color"); 309 | $(`.proxy-underline.${spanId}`) 310 | .removeClass("annotation-inactive") 311 | .each(function () { 312 | $(this).css("background-color", color); 313 | }) 314 | .addClass("temp-proxy-underline-color"); 315 | 316 | $(`.summary-item.selected .underline:not(.${spanId})`).addClass("annotation-inactive"); 317 | $(`.doc .underline:not(.${spanId})`).addClass("annotation-inactive"); 318 | $(`.proxy-underline:not(.${spanId})`).addClass("annotation-inactive"); 319 | 320 | $(".summary-item.selected .highlight:not(.annotation-hidden)").addClass("annotation-inactive"); 321 | } 322 | 323 | function resetHighlights() { 324 | removeDocTooltips(); 325 | $('.summary-item.selected .annotation-inactive').removeClass("annotation-inactive"); 326 | $('.summary-item.selected .annotation-invisible').removeClass("annotation-invisible"); 327 | $('.temp-highlight-color') 328 | .each(function () { 329 | $(this).css("background-color", $(this).data("primary-color")); 330 | }) 331 | .removeClass("temp-highlight-color"); 332 | $('.highlight.selected').removeClass("selected"); 333 | $('.proxy-highlight.selected').removeClass("selected"); 334 | $('.summary-item [title]').removeAttr("title"); 335 | } 336 | 337 | function highlightToken() { 338 | const highlightId = $(this).data("highlight-id"); 339 | $(`.summary-item.selected .highlight:not(.summary-highlight-${highlightId})`).addClass("annotation-inactive"); 340 | $('.highlight.selected').removeClass("selected") 341 | $('.proxy-highlight.selected').removeClass("selected") 342 | const matchedDocHighlight = `.display .main-doc .summary-highlight-${highlightId}`; 343 | const matchedProxyHighlight = `.proxy-doc .summary-highlight-${highlightId}`; 344 | $(matchedDocHighlight + ", " + matchedProxyHighlight) 345 | .each(function () { 346 | const newHighlightColor = $(this).data(`color-${highlightId}`); 347 | $(this).css("background-color", newHighlightColor); 348 | $(this).addClass("selected"); 349 | }) 350 | .addClass("temp-highlight-color"); 351 | $(".underline").addClass("annotation-inactive"); 352 | $(".proxy-underline").addClass("annotation-invisible") 353 | showDocTooltip(this); 354 | $(this).addClass("selected"); 355 | $(this).removeClass("annotation-inactive"); 356 | $('.summary-item [title]').removeAttr("title"); 357 | if (!isViewable($(matchedDocHighlight))) { 358 | $(this).attr("title", "Click to scroll to most similar word.") 359 | } 360 | } 361 | 362 | function isViewable(el) { 363 | const elTop = el.offset().top; 364 | const elBottom = elTop + el.outerHeight(); 365 | const scrollRegion = $(".display .main-doc"); 366 | const scrollTop = scrollRegion.offset().top; 367 | const scrollBottom = scrollTop + scrollRegion.outerHeight(); 368 | return elTop > scrollTop && elBottom < scrollBottom; 369 | } 370 | 371 | // Initialization 372 | 373 | $(function () { 374 | $('[data-toggle="tooltip"]').tooltip({ 375 | // 'boundary': '.summary-container' 376 | trigger: 'hover' 377 | }) 378 | }) 379 | updateAnnotations(); 380 | 381 | // Bind events 382 | 383 | $(window).resize(function () { 384 | rtime = new Date(); 385 | if (timeout === false) { 386 | timeout = true; 387 | setTimeout(resizeend, delta); 388 | } 389 | }); 390 | 391 | $(".summary-list").on( 392 | "click", 393 | ".summary-item.selectable:not(.selected)", 394 | function () { 395 | const summary_index = $(this).data("index"); 396 | 397 | // Update summary items 398 | $(".summary-item.selected").removeClass("selected") 399 | $(this).addClass("selected") 400 | 401 | // Update doc 402 | // Show the version of document aligned with selected summary index 403 | $(`.doc[data-index=${summary_index}]`).removeClass("nodisplay").addClass("display"); 404 | // Hide the version of document not aligned with selected summary index 405 | $(`.doc[data-index!=${summary_index}]`).removeClass("display").addClass("nodisplay"); 406 | 407 | updateAnnotations(); 408 | } 409 | ); 410 | 411 | $("#option-lexical").click(function () { 412 | $(this).toggleClass("selected") 413 | updateAnnotations() 414 | }); 415 | $("#option-semantic").click(function () { 416 | $(this).toggleClass("selected") 417 | updateAnnotations() 418 | }); 419 | $("#option-novel").click(function () { 420 | $(this).toggleClass("selected") 421 | updateAnnotations() 422 | }); 423 | $("#option-entity").click(function () { 424 | $(this).toggleClass("selected") 425 | updateAnnotations() 426 | }); 427 | 428 | const activeUnderlines = ".summary-item.selected .underline:not(.annotation-inactive):not(.annotation-hidden)"; 429 | $(".summary-list").on( 430 | "mouseenter", 431 | activeUnderlines, 432 | function () { 433 | highlightUnderlines.call(this); 434 | } 435 | ); 436 | 437 | $(".summary-list").on( 438 | "mouseleave", 439 | activeUnderlines, 440 | resetUnderlines 441 | ); 442 | $(".summary-list").on( 443 | "click", 444 | activeUnderlines, 445 | function () { 446 | // Find aligned underline in doc and scroll doc to that position 447 | highlightUnderlines.call(this); 448 | const mainDoc = $(".display .main-doc"); 449 | const spanId = getSpanId($(this)); 450 | const matchedUnderline = $(`.doc .underline.${spanId}`); 451 | mainDoc.animate({ 452 | scrollTop: mainDoc.scrollTop() + 453 | matchedUnderline.offset().top - mainDoc.offset().top - 60 454 | }, 455 | 300 456 | ) 457 | } 458 | ); 459 | 460 | const activeHighlights = ".summary-item.selected .highlight:not(.annotation-hidden):not(.matches-ngram), " + 461 | ".summary-item.selected:not(.annotate-lexical) .highlight:not(.annotation-hidden)"; 462 | $(".summary-list").on( 463 | "mouseenter", 464 | activeHighlights, 465 | function () { 466 | highlightToken.call(this); 467 | }) 468 | $(".summary-list").on( 469 | "mouseleave", 470 | activeHighlights, 471 | function () { 472 | resetHighlights(); 473 | resetUnderlines(); 474 | } 475 | ); 476 | $(".summary-list").on( 477 | "click", 478 | activeHighlights, 479 | function () { 480 | highlightToken.call(this); 481 | // Find corresponding highlight in doc representing max similarity and scroll doc to that position 482 | const topDocHighlightId = $(this).data("top-doc-highlight-id"); 483 | removeDocTooltips(topDocHighlightId); 484 | const topDocHighlight = $(`.display .main-doc .highlight[data-highlight-id=${topDocHighlightId}]`); 485 | const mainDoc = $(".display .main-doc"); 486 | const el = this; 487 | mainDoc.animate({ 488 | scrollTop: mainDoc.scrollTop() + 489 | topDocHighlight.offset().top - mainDoc.offset().top - 60 490 | }, 491 | 300, 492 | function () { 493 | setTimeout( 494 | function () { 495 | // If no other tooltips have since been displayed 496 | if ($("[data-tooltip-timestamp]").length == 0) { 497 | showDocTooltip(el); 498 | } else { 499 | console.log("Not showing tooltip because one already exists") 500 | } 501 | }, 502 | 100 503 | ) 504 | } 505 | ) 506 | } 507 | ); 508 | $(".summary-list").on( 509 | "mouseleave", 510 | ".summary-item.selected .content", 511 | function () { 512 | resetHighlights(); 513 | resetUnderlines(); 514 | }, 515 | ); 516 | } 517 | ); 518 | 519 | -------------------------------------------------------------------------------- /summvis.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import operator 4 | import os 5 | import re 6 | from pathlib import Path 7 | 8 | import spacy 9 | import spacy.lang.en 10 | import streamlit as st 11 | from meerkat import DataPanel 12 | from spacy.tokens import Doc 13 | 14 | from align import NGramAligner, BertscoreAligner, StaticEmbeddingAligner 15 | from components import MainView 16 | from utils import clean_text 17 | 18 | MIN_SEMANTIC_SIM_THRESHOLD = 0.1 19 | MAX_SEMANTIC_SIM_TOP_K = 10 20 | 21 | Doc.set_extension("name", default=None, force=True) 22 | Doc.set_extension("column", default=None, force=True) 23 | 24 | 25 | class Instance(): 26 | def __init__(self, id_, document, reference, preds, data=None): 27 | self.id = id_ 28 | self.document = document 29 | self.reference = reference 30 | self.preds = preds 31 | self.data = data 32 | 33 | 34 | @st.cache(allow_output_mutation=True) 35 | def load_from_index(filename, index): 36 | with open(filename) as f: 37 | for i, line in enumerate(f): 38 | if i == index: 39 | return json.loads(line.strip()) 40 | 41 | 42 | def _nlp_key(x: spacy.Language): 43 | return str(x.path) 44 | 45 | 46 | @st.cache(allow_output_mutation=True, hash_funcs={spacy.lang.en.English: _nlp_key}) 47 | def load_dataset(path: str, nlp: spacy.Language): 48 | if path.endswith('.jsonl'): 49 | return DataPanel.from_jsonl(path) 50 | try: 51 | return DataPanel.read(path, nlp=nlp) 52 | except NotADirectoryError: 53 | return DataPanel.from_jsonl(path) 54 | 55 | 56 | @st.cache(allow_output_mutation=True) 57 | def get_nlp(): 58 | try: 59 | nlp = spacy.load("en_core_web_lg") 60 | except: 61 | nlp = spacy.load("en_core_web_sm") 62 | is_lg = False 63 | else: 64 | is_lg = True 65 | nlp.add_pipe('sentencizer', before="parser") 66 | return nlp, is_lg 67 | 68 | 69 | def retrieve(dataset, index, filename=None): 70 | if index >= len(dataset): 71 | st.error(f"Index {index} exceeds dataset length.") 72 | 73 | eval_dataset = None 74 | if filename: 75 | # TODO Handle this through dedicated fields 76 | if "cnn_dailymail" in filename: 77 | eval_dataset = "cnndm" 78 | elif "xsum" in filename: 79 | eval_dataset = "xsum" 80 | 81 | data = dataset[index] 82 | id_ = data.get('id', '') 83 | 84 | try: 85 | document = data['spacy:document'] 86 | except KeyError: 87 | if not is_lg: 88 | st.error("'en_core_web_lg model' is required unless loading from cached file." 89 | "To install: 'python -m spacy download en_core_web_lg'") 90 | try: 91 | text = data['document'] 92 | except KeyError: 93 | text = data['article'] 94 | if not text: 95 | st.error("Document is blank") 96 | return 97 | document = nlp(text if args.no_clean else clean_text(text)) 98 | document._.name = "Document" 99 | document._.column = "document" 100 | 101 | try: 102 | reference = data['spacy:summary:reference'] 103 | 104 | except KeyError: 105 | if not is_lg: 106 | st.error("'en_core_web_lg model' is required unless loading from cached file." 107 | "To install: 'python -m spacy download en_core_web_lg'") 108 | try: 109 | text = data['summary'] if 'summary' in data else data['summary:reference'] 110 | except KeyError: 111 | text = data.get('highlights') 112 | if text: 113 | reference = nlp(text if args.no_clean else clean_text(text)) 114 | else: 115 | reference = None 116 | if reference is not None: 117 | reference._.name = "Reference" 118 | reference._.column = "summary:reference" 119 | 120 | model_names = set() 121 | for k in data: 122 | m = re.match('(preprocessed_)?summary:(?P.*)', k) 123 | if m: 124 | model_name = m.group('model') 125 | if model_name != 'reference': 126 | model_names.add(model_name) 127 | 128 | preds = [] 129 | for model_name in model_names: 130 | try: 131 | pred = data[f"spacy:summary:{model_name}"] 132 | except KeyError: 133 | if not is_lg: 134 | st.error("'en_core_web_lg model' is required unless loading from cached file." 135 | "To install: 'python -m spacy download en_core_web_lg'") 136 | text = data[f"summary:{model_name}"] 137 | pred = nlp(text if args.no_clean else clean_text(text)) 138 | 139 | parts = model_name.split("-") 140 | primary_sort = 0 141 | if len(parts) == 2: 142 | model, train_dataset = parts 143 | if train_dataset == eval_dataset: 144 | formatted_model_name = model.upper() 145 | else: 146 | formatted_model_name = f"{model.upper()} ({train_dataset.upper()}-trained)" 147 | if train_dataset in ["xsum", "cnndm"]: 148 | primary_sort = 1 149 | else: 150 | primary_sort = 2 151 | else: 152 | formatted_model_name = model_name.upper() 153 | pred._.name = formatted_model_name 154 | pred._.column = f"summary:{model_name}" 155 | preds.append( 156 | ((primary_sort, formatted_model_name), pred) 157 | ) 158 | 159 | preds = [pred for _, pred in sorted(preds)] 160 | 161 | return Instance( 162 | id_=id_, 163 | document=document, 164 | reference=reference, 165 | preds=preds, 166 | data=data, 167 | ) 168 | 169 | 170 | def filter_alignment(alignment, threshold, top_k): 171 | filtered_alignment = {} 172 | for k, v in alignment.items(): 173 | filtered_matches = [(match_idx, score) for match_idx, score in v if score >= threshold] 174 | if filtered_matches: 175 | filtered_alignment[k] = sorted(filtered_matches, key=operator.itemgetter(1), reverse=True)[:top_k] 176 | return filtered_alignment 177 | 178 | 179 | def select_comparison(example): 180 | all_summaries = [] 181 | 182 | if example.reference: 183 | all_summaries.append(example.reference) 184 | if example.preds: 185 | all_summaries.extend(example.preds) 186 | 187 | from_documents = [example.document] 188 | if example.reference: 189 | from_documents.append(example.reference) 190 | document_names = [document._.name for document in from_documents] 191 | select_document_name = sidebar_placeholder_from.selectbox( 192 | label="Comparison FROM:", 193 | options=document_names 194 | ) 195 | document_index = document_names.index(select_document_name) 196 | selected_document = from_documents[document_index] 197 | 198 | remaining_summaries = [summary for summary in all_summaries if 199 | summary._.name != selected_document._.name] 200 | remaining_summary_names = [summary._.name for summary in remaining_summaries] 201 | 202 | selected_summary_names = sidebar_placeholder_to.multiselect( 203 | 'Comparison TO:', 204 | remaining_summary_names, 205 | remaining_summary_names 206 | ) 207 | selected_summaries = [] 208 | for summary_name in selected_summary_names: 209 | summary_index = remaining_summary_names.index(summary_name) 210 | selected_summaries.append(remaining_summaries[summary_index]) 211 | return selected_document, selected_summaries 212 | 213 | 214 | def show_main(example): 215 | # Get user input 216 | 217 | semantic_sim_type = st.sidebar.radio( 218 | "Semantic similarity type:", 219 | ["Contextual embedding", "Static embedding"] 220 | ) 221 | semantic_sim_threshold = st.sidebar.slider( 222 | "Semantic similarity threshold:", 223 | min_value=MIN_SEMANTIC_SIM_THRESHOLD, 224 | max_value=1.0, 225 | step=0.1, 226 | value=0.2, 227 | ) 228 | semantic_sim_top_k = st.sidebar.slider( 229 | "Semantic similarity top-k:", 230 | min_value=1, 231 | max_value=MAX_SEMANTIC_SIM_TOP_K, 232 | step=1, 233 | value=10, 234 | ) 235 | 236 | document, summaries = select_comparison(example) 237 | layout = st.sidebar.radio("Layout:", ["Vertical", "Horizontal"]).lower() 238 | scroll = True 239 | gray_out_stopwords = st.sidebar.checkbox(label="Gray out stopwords", value=True) 240 | 241 | # Gather data 242 | try: 243 | lexical_alignments = [ 244 | example.data[f'{NGramAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}'] 245 | for summary in summaries 246 | ] 247 | except KeyError: 248 | lexical_alignments = NGramAligner().align(document, summaries) 249 | 250 | if semantic_sim_type == "Static embedding": 251 | try: 252 | semantic_alignments = [ 253 | example.data[f'{StaticEmbeddingAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}'] 254 | for summary in summaries 255 | ] 256 | except KeyError: 257 | semantic_alignments = StaticEmbeddingAligner( 258 | semantic_sim_threshold, 259 | semantic_sim_top_k).align( 260 | document, 261 | summaries 262 | ) 263 | else: 264 | try: 265 | semantic_alignments = [ 266 | example.data[f'{BertscoreAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}'] 267 | for summary in summaries 268 | ] 269 | except KeyError: 270 | semantic_alignments = BertscoreAligner(semantic_sim_threshold, 271 | semantic_sim_top_k).align(document, 272 | summaries) 273 | 274 | MainView( 275 | document, 276 | summaries, 277 | semantic_alignments, 278 | lexical_alignments, 279 | layout, 280 | scroll, 281 | gray_out_stopwords, 282 | ).show(height=720) 283 | 284 | 285 | if __name__ == "__main__": 286 | 287 | st.set_page_config(layout="wide") 288 | 289 | parser = argparse.ArgumentParser() 290 | parser.add_argument('--path', type=str, default='data') 291 | parser.add_argument('--no_clean', action='store_true', default=False, 292 | help="Do not clean text (remove extraneous spaces, newlines).") 293 | args = parser.parse_args() 294 | 295 | nlp, is_lg = get_nlp() 296 | 297 | path = Path(args.path) 298 | path_dir = path.parent 299 | all_files = set(map(os.path.basename, path_dir.glob('*'))) 300 | files = sorted([ 301 | fname for fname in all_files if not (fname.endswith(".py") or fname.startswith(".")) 302 | ]) 303 | if path.is_file: 304 | try: 305 | file_index = files.index(path.name) 306 | except: 307 | raise FileNotFoundError(f"File not found: {path.name}") 308 | else: 309 | file_index = 0 310 | col1, col2 = st.beta_columns((3, 1)) 311 | filename = col1.selectbox(label="File:", options=files, index=file_index) 312 | dataset = load_dataset(str(path_dir / filename), nlp=nlp) 313 | 314 | dataset_size = len(dataset) 315 | query = col2.number_input(f"Index (Size: {dataset_size}):", value=0, min_value=0, max_value=dataset_size - 1) 316 | 317 | sidebar_placeholder_from = st.sidebar.empty() 318 | sidebar_placeholder_to = st.sidebar.empty() 319 | 320 | if query is not None: 321 | example = retrieve(dataset, query, filename) 322 | if example: 323 | show_main(example) 324 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def clean_text(text): 5 | split_punct = re.escape(r'()') 6 | return ' '.join(re.findall(rf"[^\s{split_punct}]+|[{split_punct}]", text)) 7 | # Ensure parentheses are probably separated by spaCy tokenizer for CNN/DailyMail dataset. 8 | return text.replace("(", "( ").replace(")", ") ") 9 | 10 | -------------------------------------------------------------------------------- /website/annotations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/annotations.png -------------------------------------------------------------------------------- /website/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/demo.gif -------------------------------------------------------------------------------- /website/main-vis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/main-vis.jpg -------------------------------------------------------------------------------- /website/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/title.png -------------------------------------------------------------------------------- /website/triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/triangle.png --------------------------------------------------------------------------------