├── .gitignore
├── LICENSE
├── README.md
├── align.py
├── components.py
├── examples
    ├── wikinews
    │   ├── README.md
    │   ├── load.sh
    │   ├── wikinews.cache
    │   │   ├── meta.yaml
    │   │   ├── mgr
    │   │   │   ├── blocks
    │   │   │   │   └── 140645734304016
    │   │   │   │   │   ├── data.feather
    │   │   │   │   │   └── meta.yaml
    │   │   │   ├── columns
    │   │   │   │   ├── BertscoreAligner:spacy:document:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:document:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:document:spacy:summary:reference
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:document:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:document:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:document:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:document:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:document:spacy:summary:reference
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:document:spacy:summary:reference
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.dill
    │   │   │   │   │   ├── meta.yaml
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── document
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_document
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_summary:bart-cnndm
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_summary:bart-xsum
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_summary:pegasus-cnndm
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_summary:pegasus-xsum
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── preprocessed_summary:reference
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── spacy:document
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── spacy:summary:bart-cnndm
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── spacy:summary:bart-xsum
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── spacy:summary:pegasus-cnndm
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── spacy:summary:pegasus-xsum
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── spacy:summary:reference
    │   │   │   │   │   ├── data.spacy
    │   │   │   │   │   └── meta.yaml
    │   │   │   │   ├── summary:bart-cnndm
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── summary:bart-xsum
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── summary:pegasus-cnndm
    │   │   │   │   │   └── state.dill
    │   │   │   │   ├── summary:pegasus-xsum
    │   │   │   │   │   └── state.dill
    │   │   │   │   └── summary:reference
    │   │   │   │   │   └── state.dill
    │   │   │   └── meta.yaml
    │   │   └── state.dill
    │   └── wikinews.jsonl
    └── xsum
    │   └── load.sh
├── generation.py
├── join.py
├── preprocessing.py
├── quickstart.sh
├── requirements.txt
├── resources
    ├── jquery.color-2.1.2.min.js
    ├── summvis.css
    └── summvis.js
├── summvis.py
├── utils.py
└── website
    ├── annotations.png
    ├── demo.gif
    ├── main-vis.jpg
    ├── title.png
    └── triangle.png


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_STORE
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2021 SummVis
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SummVis
  2 | 
  3 | SummVis is an open-source visualization tool that supports fine-grained analysis of summarization models, data, and evaluation 
  4 | metrics. Through its lexical and semantic visualizations, SummVis enables in-depth exploration across important dimensions such as factual consistency and abstractiveness.
  5 |  
  6 | Authors: [Jesse Vig](https://twitter.com/jesse_vig)<sup>1</sup>, 
  7 | [Wojciech Kryściński](https://twitter.com/iam_wkr)<sup>1</sup>,
  8 |  [Karan Goel](https://twitter.com/krandiash)<sup>2</sup>,
  9 |   [Nazneen Fatema Rajani](https://twitter.com/nazneenrajani)<sup>1</sup><br/>
 10 |   <sup>1</sup>[Salesforce Research](https://einstein.ai/) <sup>2</sup>[Stanford Hazy Research](https://hazyresearch.stanford.edu/)
 11 | 
 12 | 📖 [Paper](https://arxiv.org/abs/2104.07605)
 13 | 🎥 [Demo](https://vimeo.com/540429745)
 14 | 
 15 | <p>
 16 |     <img src="website/demo.gif" alt="Demo gif"/>
 17 | </p>
 18 | 
 19 | _We welcome issues for questions, suggestions, requests or bug reports._
 20 | 
 21 | ## Table of Contents
 22 | - [User guide](#user-guide)
 23 | - [Installation](#installation)
 24 | - [Quickstart](#quickstart)
 25 | - [Load data into SummVis](#loading-data-into-summvis)
 26 | - [Deploying SummVis remotely](#deploying-summvis-remotely)
 27 | - [Citation](#citation)
 28 | - [Acknowledgements](#acknowledgements)
 29 | 
 30 | ## User guide
 31 | 
 32 | ### Overview
 33 | SummVis is a tool for analyzing abstractive summarization systems. It provides fine-grained insights on summarization
 34 | models, data, and evaluation metrics by visualizing the relationships between source documents, reference summaries,
 35 | and generated summaries, as illustrated in the figure below.<br/>
 36 | 
 37 | ![Relations between source, reference, and generated summaries](website/triangle.png) 
 38 | 
 39 | ### Interface
 40 | 
 41 | The SummVis interface is shown below. The example displayed is the first record from the
 42 |  [CNN / Daily Mail](https://huggingface.co/datasets/cnn_dailymail) validation set. 
 43 | 
 44 | ![Main interface](website/main-vis.jpg) 
 45 | 
 46 | 
 47 | #### Components
 48 | 
 49 | **(a)** Configuration panel<br/>
 50 | **(b)** Source document (or reference summary, depending on configuration)<br/>
 51 | **(c)** Generated summaries (and/or reference summary, depending on configuration)<br/>
 52 | **(d)** Scroll bar with global view of annotations<br/>
 53 | 
 54 | #### Annotations   
 55 | <img src="website/annotations.png" width="548" height="39" alt="Annotations"/>
 56 | 
 57 | **N-gram overlap:** Word sequences that overlap between the document on the left and
 58 |  the selected summary on the right. Underlines are color-coded by index of summary sentence. <br/>
 59 | **Semantic overlap**: Words in the summary that are semantically close to one or more words in document on the left.<br/>
 60 | **Novel words**: Words in the summary that do not appear in the document on the left.<br/>
 61 | **Novel entities**: Entity words in the summary that do not appear in the document on the left.<br/>
 62 | 
 63 | ### Limitations   
 64 | Currently only English text is supported. Extremely long documents may render slowly in the tool.
 65 | 
 66 | ## Installation
 67 | ```shell
 68 | git clone https://github.com/robustness-gym/summvis.git
 69 | cd summvis
 70 | # Following line necessary to get pip > 21.3
 71 | pip install --upgrade pip
 72 | pip install -r requirements.txt
 73 | ```
 74 | 
 75 | ## Quickstart
 76 | 
 77 | View an example from [WikiNews](examples/wikinews/README.md):
 78 | 
 79 | ```shell
 80 | streamlit run summvis.py -- --path examples/wikinews/wikinews.cache
 81 | ```
 82 | 
 83 | 
 84 | ## Loading data into SummVis
 85 | 
 86 | ### If you have generated summaries:
 87 | 
 88 | The following steps describe how to load source documents and associated precomputed summaries into the SummVis tool.
 89 | 
 90 | **1. Download spaCy model**
 91 | ```
 92 | python -m spacy download en_core_web_lg
 93 | ```
 94 | This may take several minutes.
 95 | 
 96 | **2. Create .jsonl file with the source document, reference summary and/or generated summaries in the following format:** 
 97 | 
 98 | ```
 99 | {"document":  "This is the first source document", "summary:reference": "This is the reference summary", "summary:testmodel1": "This is the summary for testmodel1", "summary:testmodel2": "This is the summary for testmodel2"}
100 | {"document":  "This is the second source document", "summary:reference": "This is the reference summary", "summary:testmodel1": "This is the summary for testmodel1", "summary:testmodel2": "This is the summary for testmodel2"}
101 | ```
102 | 
103 | The key for the reference summary must equal `summary:reference` and the key for any other summary must be of the form
104 | `summary:<name>`, e.g. `summary:BART`. The document and at least one summary (reference, other, or both) are required.
105 | 
106 | We also provide [scripts to generate summaries](#if-you-do-not-have-generated-summaries) if you haven't done so already.
107 | 
108 | **3. Preprocess .jsonl file**
109 | 
110 | Run `preprocessing.py` to precompute all data required in the interface (running `spaCy`, lexical and semantic
111 |  aligners) and save a cache file, which can be read directly into the tool. Note that this script may take some time to run
112 |   (~5-15 seconds per example on a MacBook Pro for
113 |  documents of typical length found in CNN/DailyMail or XSum), so you may want to start with a small subset of your dataset
114 | using the `--n_samples` argument (below). This will also be expedited by running on a GPU.
115 | 
116 | ```shell
117 | python preprocessing.py \
118 | --workflow \
119 | --dataset_jsonl path/to/my_dataset.jsonl \
120 | --processed_dataset_path path/to/my_cache_file
121 | ```
122 | 
123 | Additional options:   
124 |     `--n_samples <number_of_samples>`: Process the first `number_of_samples` samples only (recommended).   
125 |     `--no_clean`: Do not perform additional text cleaning that may remove newlines, etc.   
126 | 
127 | **4. Launch Streamlit app**
128 | 
129 | ```shell
130 | streamlit run summvis.py -- --path path/to/my_cache_file_or_parent_directory
131 | ```
132 | 
133 | Note that the additional `--` is not a mistake, and is required to pass command-line arguments in Streamlit.
134 | 
135 | ### If you do NOT have generated summaries:
136 | 
137 | Before running the steps above, you may run the additional steps below to generate summaries. You may also refer to the [sample
138 | end-to-end loading scripts](examples/)  for [WikiNews](examples/wikinews/load.sh) (loaded from .jsonl file) and [XSum](examples/xsum/load.sh)
139 | (loaded from HuggingFace Datasets).
140 | 
141 | **1. Create file with the source documents and optional reference summaries in the following format:**
142 | 
143 | ```
144 | {"document":  "This is the first source document", "summary:reference": "This is the reference summary"}
145 | {"document":  "This is the second source document", "summary:reference": "This is the reference summary"}
146 | ```
147 | 
148 | You may create a .jsonl format directly from a Huggingface dataset by running `preprocessing.py` with the `--standardize` flag:
149 | 
150 | ```shell
151 | python preprocessing.py \
152 | --standardize \
153 | --dataset hf_dataset_name \
154 | --version hf_dataset_version (optional) \
155 | --split hf_dataset_split \
156 | --save_jsonl_path path/to/save_jsonl_file
157 | ```
158 | 
159 | **2. Generate predictions**
160 | 
161 | To use one of the **6 standard models** (`bart-xsum`, `bart-cnndm`, `pegasus-xsum`, `pegasus-cnndm`, `pegasus-newsroom`,
162 |     `pegasus-multinews`):
163 | ```shell
164 | python generation.py --model model_abbrev --data_path path/to/jsonl_file
165 | ```
166 | where `model` is one of the above 6 model codes.
167 | 
168 | To use an **any Huggingface model**:
169 | ```shell
170 | python generation.py --model_name_or_path model_name_or_path --data_path path/to/jsonl_file
171 | ```
172 | where `model_name_or_path` is the name of a Huggingface model or a local path.
173 | 
174 | Either of the above two commands will generate a prediction file named `<model_name>.<dataset_file_name>.predictions`
175 | 
176 | **3. Join one or more prediction files (from previous step) with original dataset**
177 | 
178 | ```shell
179 | python join.py \
180 |   --data_path path/to/jsonl_file \
181 |   --generation_paths \
182 |       path/to/prediction_file_1 \
183 |       path/to/prediction_file_2 \
184 |   --output_path path/to/save_jsonl_file
185 | ```
186 | 
187 | Once you complete these steps, you may proceed with the [final steps](#if-you-have-already-generated-summaries) to load your file into SummVis.
188 | 
189 | ## Deploying SummVis remotely
190 | 
191 | See these tutorials on deploying a Streamlit app to various cloud services (from [Streamlit docs](https://docs.streamlit.io/en/stable/streamlit_faq.html)):
192 | 
193 | * [How to Deploy Streamlit to a Free Amazon EC2 instance](https://towardsdatascience.com/how-to-deploy-a-streamlit-app-using-an-amazon-free-ec2-instance-416a41f69dc3), by Rahul Agarwal   
194 | * [Host Streamlit on Heroku](https://towardsdatascience.com/quickly-build-and-deploy-an-application-with-streamlit-988ca08c7e83), by Maarten Grootendorst   
195 | * [Host Streamlit on Azure](https://towardsdatascience.com/deploying-a-streamlit-web-app-with-azure-app-service-1f09a2159743), by Richard Peterson   
196 | * [Host Streamlit on 21YunBox](https://www.21yunbox.com/docs/#/deploy-streamlit), by Toby Lei 
197 | 
198 | ## Citation
199 | 
200 | When referencing this repository, please cite [this paper](https://arxiv.org/abs/2104.07605):
201 | 
202 | ```
203 | @misc{vig2021summvis,
204 |       title={SummVis: Interactive Visual Analysis of Models, Data, and Evaluation for Text Summarization}, 
205 |       author={Jesse Vig and Wojciech Kry{\'s}ci{\'n}ski and Karan Goel and Nazneen Fatema Rajani},
206 |       year={2021},
207 |       eprint={2104.07605},
208 |       archivePrefix={arXiv},
209 |       primaryClass={cs.CL},
210 |       url={https://arxiv.org/abs/2104.07605}
211 | }
212 | ```
213 | 
214 | ## Acknowledgements
215 | 
216 | We thank [Michael Correll](http://correll.io) for his valuable feedback.
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/align.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import itertools
  3 | from abc import ABC, abstractmethod
  4 | from collections import defaultdict
  5 | from operator import itemgetter
  6 | from typing import List, Dict, Tuple
  7 | from typing import Sequence
  8 | from abc import ABC
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | from bert_score import BERTScorer
 13 | from nltk import PorterStemmer
 14 | from spacy.tokens import Doc, Span
 15 | from toolz import itertoolz
 16 | from transformers import AutoTokenizer
 17 | from transformers.tokenization_utils_base import PaddingStrategy
 18 | 
 19 | 
 20 | class EmbeddingModel(ABC):
 21 |     @abstractmethod
 22 |     def embed(
 23 |         self,
 24 |         sents: List[Span]
 25 |     ):
 26 |         pass
 27 | 
 28 | 
 29 | class ContextualEmbedding(EmbeddingModel):
 30 | 
 31 |     def __init__(self, model, tokenizer_name, max_length, batch_size=32):
 32 |         self.model = model
 33 |         self.tokenizer = SpacyHuggingfaceTokenizer(tokenizer_name, max_length)
 34 |         self._device = model.device
 35 |         self.batch_size = batch_size
 36 | 
 37 |     def embed(
 38 |         self,
 39 |         sents: List[Span]
 40 |     ):
 41 |         spacy_embs_list = []
 42 |         for start_idx in range(0, len(sents), self.batch_size):
 43 |             batch = sents[start_idx: start_idx + self.batch_size]
 44 |             encoded_input, special_tokens_masks, token_alignments = self.tokenizer.batch_encode(batch)
 45 |             encoded_input = {k: v.to(self._device) for k, v in encoded_input.items()}
 46 |             with torch.no_grad():
 47 |                 model_output = self.model(**encoded_input)
 48 |                 embeddings = model_output[0].cpu()
 49 |             for embs, mask, token_alignment \
 50 |                 in zip(embeddings, special_tokens_masks, token_alignments):
 51 |                 mask = torch.tensor(mask)
 52 |                 embs = embs[mask == 0]  # Filter embeddings at special token positions
 53 |                 spacy_embs = []
 54 |                 for hf_idxs in token_alignment:
 55 |                     if hf_idxs is None:
 56 |                         pooled_embs = torch.zeros_like(embs[0])
 57 |                     else:
 58 |                         pooled_embs = embs[hf_idxs].mean(dim=0)  # Pool embeddings that map to the same spacy token
 59 |                     spacy_embs.append(pooled_embs.numpy())
 60 |                 spacy_embs = np.stack(spacy_embs)
 61 |                 spacy_embs = spacy_embs / np.linalg.norm(spacy_embs, axis=-1, keepdims=True)  # Normalize
 62 |                 spacy_embs_list.append(spacy_embs)
 63 |         for embs, sent in zip(spacy_embs_list, sents):
 64 |             assert len(embs) == len(sent)
 65 |         return spacy_embs_list
 66 | 
 67 | 
 68 | class StaticEmbedding(EmbeddingModel):
 69 | 
 70 |     def embed(
 71 |         self,
 72 |         sents: List[Span]
 73 |     ):
 74 |         return [
 75 |             np.stack([t.vector / (t.vector_norm or 1) for t in sent])
 76 |             for sent in sents
 77 |         ]
 78 | 
 79 | 
 80 | class Aligner(ABC):
 81 |     @abstractmethod
 82 |     def align(
 83 |         self,
 84 |         source: Doc,
 85 |         targets: Sequence[Doc]
 86 |     ) -> List[Dict]:
 87 |         """Compute alignment from summary tokens to doc tokens
 88 |         Args:
 89 |             source: Source spaCy document
 90 |             targets: Target spaCy documents
 91 |         Returns: List of alignments, one for each target document"""
 92 |         pass
 93 | 
 94 | 
 95 | class EmbeddingAligner(Aligner):
 96 | 
 97 |     def __init__(
 98 |         self,
 99 |         embedding: EmbeddingModel,
100 |         threshold: float,
101 |         top_k: int,
102 |         baseline_val=0
103 |     ):
104 |         self.threshold = threshold
105 |         self.top_k = top_k
106 |         self.embedding = embedding
107 |         self.baseline_val = baseline_val
108 | 
109 |     def align(
110 |         self,
111 |         source: Doc,
112 |         targets: Sequence[Doc]
113 |     ) -> List[Dict]:
114 |         """Compute alignment from summary tokens to doc tokens with greatest semantic similarity
115 |         Args:
116 |             source: Source spaCy document
117 |             targets: Target spaCy documents
118 |         Returns: List of alignments, one for each target document
119 |         """
120 |         if len(source) == 0:
121 |             return [{} for _ in targets]
122 |         all_sents = list(source.sents) + list(itertools.chain.from_iterable(target.sents for target in targets))
123 |         chunk_sizes = [_iter_len(source.sents)] + \
124 |                       [_iter_len(target.sents) for target in targets]
125 |         all_sents_token_embeddings = self.embedding.embed(all_sents)
126 |         chunked_sents_token_embeddings = _split(all_sents_token_embeddings, chunk_sizes)
127 |         source_sent_token_embeddings = chunked_sents_token_embeddings[0]
128 |         source_token_embeddings = np.concatenate(source_sent_token_embeddings)
129 |         for token_idx, token in enumerate(source):
130 |             if token.is_stop or token.is_punct:
131 |                 source_token_embeddings[token_idx] = 0
132 |         alignments = []
133 |         for i, target in enumerate(targets):
134 |             target_sent_token_embeddings = chunked_sents_token_embeddings[i + 1]
135 |             target_token_embeddings = np.concatenate(target_sent_token_embeddings)
136 |             for token_idx, token in enumerate(target):
137 |                 if token.is_stop or token.is_punct:
138 |                     target_token_embeddings[token_idx] = 0
139 |             alignment = defaultdict(list)
140 |             for score, target_idx, source_idx in self._emb_sim_sparse(
141 |                 target_token_embeddings,
142 |                 source_token_embeddings,
143 |             ):
144 |                 alignment[target_idx].append((source_idx, score))
145 |             # TODO used argpartition to get nlargest
146 |             for j in list(alignment):
147 |                 alignment[j] = heapq.nlargest(self.top_k, alignment[j], itemgetter(1))
148 |             alignments.append(alignment)
149 |         return alignments
150 | 
151 |     def _emb_sim_sparse(self, embs_1, embs_2):
152 |         sim = embs_1 @ embs_2.T
153 |         sim = (sim - self.baseline_val) / (1 - self.baseline_val)
154 |         keep = sim > self.threshold
155 |         keep_idxs_1, keep_idxs_2 = np.where(keep)
156 |         keep_scores = sim[keep]
157 |         return list(zip(keep_scores, keep_idxs_1, keep_idxs_2))
158 | 
159 | 
160 | class BertscoreAligner(EmbeddingAligner):
161 |     def __init__(
162 |         self,
163 |         threshold,
164 |         top_k
165 |     ):
166 |         scorer = BERTScorer(lang="en", rescale_with_baseline=True)
167 |         model = scorer._model
168 |         embedding = ContextualEmbedding(model, "roberta-large", 510)
169 |         baseline_val = scorer.baseline_vals[2].item()
170 | 
171 |         super(BertscoreAligner, self).__init__(
172 |             embedding, threshold, top_k, baseline_val
173 |         )
174 | 
175 | 
176 | class StaticEmbeddingAligner(EmbeddingAligner):
177 |     def __init__(
178 |         self,
179 |         threshold,
180 |         top_k
181 |     ):
182 |         embedding = StaticEmbedding()
183 |         super(StaticEmbeddingAligner, self).__init__(
184 |             embedding, threshold, top_k
185 |         )
186 | 
187 | 
188 | class NGramAligner(Aligner):
189 | 
190 |     def __init__(self):
191 |         self.stemmer = PorterStemmer()
192 | 
193 |     def align(
194 |         self,
195 |         source: Doc,
196 |         targets: List[Doc],
197 |     ) -> List[Dict]:
198 | 
199 |         alignments = []
200 |         source_ngram_spans = self._get_ngram_spans(source)
201 |         for target in targets:
202 |             target_ngram_spans = self._get_ngram_spans(target)
203 |             alignments.append(
204 |                 self._align_ngrams(target_ngram_spans, source_ngram_spans)
205 |             )
206 |         return alignments
207 | 
208 |     def _get_ngram_spans(
209 |         self,
210 |         doc: Doc,
211 |     ):
212 |         ngrams = []
213 |         for sent in doc.sents:
214 |             for n in range(1, len(list(sent))):
215 |                 tokens = [t for t in sent if not (t.is_stop or t.is_punct)]
216 |                 ngrams.extend(_ngrams(tokens, n))
217 | 
218 |         def ngram_key(ngram):
219 |             return tuple(self.stemmer.stem(token.text).lower() for token in ngram)
220 | 
221 |         key_to_ngrams = itertoolz.groupby(ngram_key, ngrams)
222 |         key_to_spans = {}
223 |         for k, grouped_ngrams in key_to_ngrams.items():
224 |             key_to_spans[k] = [
225 |                 (ngram[0].i, ngram[-1].i + 1)
226 |                 for ngram in grouped_ngrams
227 |             ]
228 |         return key_to_spans
229 | 
230 |     def _align_ngrams(
231 |         self,
232 |         ngram_spans_1: Dict[Tuple[str], List[Tuple[int, int]]],
233 |         ngram_spans_2: Dict[Tuple[str], List[Tuple[int, int]]]
234 |     ) -> Dict[Tuple[int, int], List[Tuple[int, int]]]:
235 |         """Align ngram spans between two documents
236 |         Args:
237 |             ngram_spans_1: Map from (normalized_token1, normalized_token2, ...) n-gram tuple to a list of token spans
238 |                 of format (start_pos, end_pos)
239 |             ngram_spans_2: Same format as above, but for second text
240 |         Returns: map from each (start, end) span in text 1 to list of aligned (start, end) spans in text 2
241 |         """
242 |         if not ngram_spans_1 or not ngram_spans_2:
243 |             return {}
244 |         max_span_end_1 = max(span[1] for span in itertools.chain.from_iterable(ngram_spans_1.values()))
245 |         token_is_available_1 = [True] * max_span_end_1  #
246 |         matched_keys = list(set(ngram_spans_1.keys()) & set(ngram_spans_2.keys()))  # Matched normalized ngrams betwee
247 |         matched_keys.sort(key=len, reverse=True)  # Process n-grams from longest to shortest
248 | 
249 |         alignment = defaultdict(list)  # Map from each matched span in text 1 to list of aligned spans in text 2
250 |         for key in matched_keys:
251 |             spans_1 = ngram_spans_1[key]
252 |             spans_2 = ngram_spans_2[key]
253 |             available_spans_1 = [span for span in spans_1 if all(token_is_available_1[slice(*span)])]
254 |             matched_spans_1 = []
255 |             if available_spans_1 and spans_2:
256 |                 # if ngram can be matched to available spans in both sequences
257 |                 for span in available_spans_1:
258 |                     # It's possible that these newly matched spans may be overlapping with one another, so
259 |                     # check that token positions still available (only one span allowed ber token in text 1):
260 |                     if all(token_is_available_1[slice(*span)]):
261 |                         matched_spans_1.append(span)
262 |                         token_is_available_1[slice(*span)] = [False] * (span[1] - span[0])
263 |             for span1 in matched_spans_1:
264 |                 alignment[span1] = spans_2
265 | 
266 |         return alignment
267 | 
268 | 
269 | class SpacyHuggingfaceTokenizer:
270 |     def __init__(
271 |         self,
272 |         model_name,
273 |         max_length
274 |     ):
275 |         self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
276 |         self.max_length = max_length
277 | 
278 |     def batch_encode(
279 |         self,
280 |         sents: List[Span]
281 |     ):
282 |         token_alignments = []
283 |         token_ids_list = []
284 | 
285 |         # Tokenize each sentence and special tokens.
286 |         for sent in sents:
287 |             hf_tokens, token_alignment = self.tokenize(sent)
288 |             token_alignments.append(token_alignment)
289 |             token_ids = self.tokenizer.convert_tokens_to_ids(hf_tokens)
290 |             encoding = self.tokenizer.prepare_for_model(
291 |                 token_ids,
292 |                 add_special_tokens=True,
293 |                 padding=False,
294 |             )
295 |             token_ids_list.append(encoding['input_ids'])
296 | 
297 |         # Add padding
298 |         max_length = max(map(len, token_ids_list))
299 |         attention_mask = []
300 |         input_ids = []
301 |         special_tokens_masks = []
302 |         for token_ids in token_ids_list:
303 |             encoding = self.tokenizer.prepare_for_model(
304 |                 token_ids,
305 |                 padding=PaddingStrategy.MAX_LENGTH,
306 |                 max_length=max_length,
307 |                 add_special_tokens=False
308 |             )
309 |             input_ids.append(encoding['input_ids'])
310 |             attention_mask.append(encoding['attention_mask'])
311 |             special_tokens_masks.append(
312 |                 self.tokenizer.get_special_tokens_mask(
313 |                     encoding['input_ids'],
314 |                     already_has_special_tokens=True
315 |                 )
316 |             )
317 | 
318 |         encoded = {
319 |             'input_ids': torch.tensor(input_ids),
320 |             'attention_mask': torch.tensor(attention_mask)
321 |         }
322 |         return encoded, special_tokens_masks, token_alignments
323 | 
324 |     def tokenize(
325 |         self,
326 |         sent
327 |     ):
328 |         """Convert spacy sentence to huggingface tokens and compute the alignment"""
329 |         hf_tokens = []
330 |         token_alignment = []
331 |         for i, token in enumerate(sent):
332 |             # "Tokenize" each word individually, so as to track the alignment between spaCy/HF tokens
333 |             # Prefix all tokens with a space except the first one in the sentence
334 |             if i == 0:
335 |                 token_text = token.text
336 |             else:
337 |                 token_text = ' ' + token.text
338 |             start_hf_idx = len(hf_tokens)
339 |             word_tokens = self.tokenizer.tokenize(token_text)
340 |             end_hf_idx = len(hf_tokens) + len(word_tokens)
341 |             if end_hf_idx < self.max_length:
342 |                 hf_tokens.extend(word_tokens)
343 |                 hf_idxs = list(range(start_hf_idx, end_hf_idx))
344 |             else:
345 |                 hf_idxs = None
346 |             token_alignment.append(hf_idxs)
347 |         return hf_tokens, token_alignment
348 | 
349 | 
350 | def _split(data, sizes):
351 |     it = iter(data)
352 |     return [[next(it) for _ in range(size)] for size in sizes]
353 | 
354 | 
355 | def _iter_len(it):
356 |     return sum(1 for _ in it)
357 | 
358 |     # TODO set up batching
359 |     # To get top K axis and value per row: https://stackoverflow.com/questions/42832711/using-np-argpartition-to-index-values-in-a-multidimensional-array
360 | 
361 | 
362 | def _ngrams(tokens, n):
363 |     for i in range(len(tokens) - n + 1):
364 |         yield tokens[i:i + n]
365 | 


--------------------------------------------------------------------------------
/components.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from itertools import count
  3 | from operator import itemgetter
  4 | from pathlib import Path
  5 | from typing import Dict, Optional
  6 | from typing import List, Tuple, Union
  7 | 
  8 | import htbuilder
  9 | import streamlit as st
 10 | from htbuilder import span, div, script, style, link, styles, HtmlElement, br
 11 | from htbuilder.units import px
 12 | from spacy.tokens import Doc
 13 | 
 14 | palette = [
 15 |     "#66c2a5",
 16 |     "#fc8d62",
 17 |     "#8da0cb",
 18 |     "#e78ac3",
 19 |     "#a6d854",
 20 |     "#ffd92f",
 21 |     "#e5c494",
 22 |     "#b3b3b3",
 23 | ]
 24 | inactive_color = "#BBB"
 25 | 
 26 | 
 27 | def local_stylesheet(path):
 28 |     with open(path) as f:
 29 |         css = f.read()
 30 |     return style()(
 31 |         css
 32 |     )
 33 | 
 34 | 
 35 | def remote_stylesheet(url):
 36 |     return link(
 37 |         href=url
 38 |     )
 39 | 
 40 | 
 41 | def local_script(path):
 42 |     with open(path) as f:
 43 |         code = f.read()
 44 |     return script()(
 45 |         code
 46 |     )
 47 | 
 48 | 
 49 | def remote_script(url):
 50 |     return script(
 51 |         src=url
 52 |     )
 53 | 
 54 | 
 55 | def get_color(sent_idx):
 56 |     return palette[sent_idx % len(palette)]
 57 | 
 58 | 
 59 | def hex_to_rgb(hex):
 60 |     hex = hex.replace("#", '')
 61 |     return tuple(int(hex[i:i + 2], 16) for i in (0, 2, 4))
 62 | 
 63 | 
 64 | def color_with_opacity(hex_color, opacity):
 65 |     rgb = hex_to_rgb(hex_color)
 66 |     return f"rgba({rgb[0]},{rgb[1]},{rgb[2]},{opacity:.2f})"
 67 | 
 68 | 
 69 | class Component:
 70 | 
 71 |     def show(self, width=None, height=None, scrolling=True, **kwargs):
 72 |         out = div(style=styles(
 73 |             **kwargs
 74 |         ))(self.html())
 75 |         html = str(out)
 76 |         st.components.v1.html(html, width=width, height=height, scrolling=scrolling)
 77 | 
 78 |     def html(self):
 79 |         raise NotImplemented
 80 | 
 81 | 
 82 | class MainView(Component):
 83 | 
 84 |     def __init__(
 85 |         self,
 86 |         document: Doc,
 87 |         summaries: List[Doc],
 88 |         semantic_alignments: Optional[List[Dict]],
 89 |         lexical_alignments: Optional[List[Dict]],
 90 |         layout: str,
 91 |         scroll: bool,
 92 |         gray_out_stopwords: bool
 93 |     ):
 94 |         self.document = document
 95 |         self.summaries = summaries
 96 |         self.semantic_alignments = semantic_alignments
 97 |         self.lexical_alignments = lexical_alignments
 98 |         self.layout = layout
 99 |         self.scroll = scroll
100 |         self.gray_out_stopwords = gray_out_stopwords
101 | 
102 |     def html(self):
103 | 
104 |         # Add document elements
105 |         if self.document._.name == 'Document':
106 |             document_name = 'Source Document'
107 |         else:
108 |             document_name = self.document._.name + ' summary'
109 |         doc_header = div(
110 |             id_="document-header"
111 |         )(
112 |             document_name
113 |         )
114 |         doc_elements = []
115 | 
116 |         # Add document content, which comprises multiple elements, one for each summary. Only the elment corresponding to
117 |         # selected summary will be visible.
118 | 
119 |         mu = MultiUnderline()
120 | 
121 |         for summary_idx, summary in enumerate(self.summaries):
122 |             token_idx_to_sent_idx = {}
123 |             for sent_idx, sent in enumerate(summary.sents):
124 |                 for token in sent:
125 |                     token_idx_to_sent_idx[token.i] = sent_idx
126 |             is_selected_summary = (summary_idx == 0)  # By default, first summary is selected
127 | 
128 |             if self.semantic_alignments is not None:
129 |                 doc_token_idx_to_matches = defaultdict(list)
130 |                 semantic_alignment = self.semantic_alignments[summary_idx]
131 |                 for summary_token_idx, matches in semantic_alignment.items():
132 |                     for doc_token_idx, sim in matches:
133 |                         doc_token_idx_to_matches[doc_token_idx].append((summary_token_idx, sim))
134 |             else:
135 |                 doc_token_idx_to_matches = {}
136 | 
137 |             token_elements = []
138 |             for doc_token_idx, doc_token in enumerate(self.document):
139 |                 if doc_token.is_stop or doc_token.is_punct:
140 |                     classes = ["stopword"]
141 |                     if self.gray_out_stopwords:
142 |                         classes.append("grayed-out")
143 |                     el = span(
144 |                         _class=" ".join(classes)
145 |                     )(
146 |                         doc_token.text
147 |                     )
148 | 
149 |                 else:
150 |                     matches = doc_token_idx_to_matches.get(doc_token_idx)
151 |                     if matches:
152 |                         summary_token_idx, sim = max(matches, key=itemgetter(1))
153 |                         sent_idx = token_idx_to_sent_idx[summary_token_idx]
154 |                         color_primary = get_color(sent_idx)
155 |                         highlight_color_primary = color_with_opacity(color_primary, sim)
156 |                         props = {
157 |                             'data-highlight-id': str(doc_token_idx),
158 |                             'data-primary-color': highlight_color_primary
159 |                         }
160 |                         match_classes = []
161 |                         for summary_token_idx, sim in matches:
162 |                             sent_idx = token_idx_to_sent_idx[summary_token_idx]
163 |                             match_classes.append(f"summary-highlight-{summary_idx}-{summary_token_idx}")
164 |                             color = color_with_opacity(get_color(sent_idx), sim)
165 |                             props[f"data-color-{summary_idx}-{summary_token_idx}"] = color
166 |                         props["data-match-classes"] = " ".join(match_classes)
167 |                         el = self._highlight(
168 |                             doc_token.text,
169 |                             highlight_color_primary,
170 |                             color_primary,
171 |                             match_classes + ["annotation-hidden"],
172 |                             **props
173 |                         )
174 |                     else:
175 |                         el = doc_token.text
176 |                 token_elements.append(el)
177 | 
178 |             spans = []
179 |             if self.lexical_alignments is not None:
180 |                 lexical_alignment = self.lexical_alignments[summary_idx]
181 |                 for summary_span, doc_spans in lexical_alignment.items():
182 |                     summary_span_start, summary_span_end = summary_span
183 |                     span_id = f"{summary_idx}-{summary_span_start}-{summary_span_end}"
184 |                     sent_idx = token_idx_to_sent_idx[summary_span_start]
185 |                     for doc_span_start, doc_span_end in doc_spans:
186 |                         spans.append((
187 |                             doc_span_start,
188 |                             doc_span_end,
189 |                             sent_idx,
190 |                             get_color(sent_idx),
191 |                             span_id
192 |                         ))
193 |             token_elements = mu.markup(token_elements, spans)
194 | 
195 |             classes = ["main-doc", "bordered"]
196 |             if self.scroll:
197 |                 classes.append("scroll")
198 | 
199 |             main_doc = div(
200 |                 _class=" ".join(classes)
201 |             )(
202 |                 token_elements
203 |             ),
204 | 
205 |             classes = ["doc"]
206 |             if is_selected_summary:
207 |                 classes.append("display")
208 |             else:
209 |                 classes.append("nodisplay")
210 |             doc_elements.append(
211 |                 div(
212 |                     **{
213 |                         "class": " ".join(classes),
214 |                         "data-index": summary_idx
215 |                     }
216 |                 )(
217 |                     main_doc,
218 |                     div(_class="proxy-doc"),
219 |                     div(_class="proxy-scroll")
220 |                 )
221 |             )
222 | 
223 |         summary_title = "Summary"
224 |         summary_header = div(
225 |             id_="summary-header"
226 |         )(
227 |             summary_title,
228 |             div(id="summary-header-gap"),
229 |         )
230 | 
231 |         summary_items = []
232 |         for summary_idx, summary in enumerate(self.summaries):
233 |             token_idx_to_sent_idx = {}
234 |             for sent_idx, sent in enumerate(summary.sents):
235 |                 for token in sent:
236 |                     token_idx_to_sent_idx[token.i] = sent_idx
237 | 
238 |             spans = []
239 |             matches_ngram = [False] * len(list(summary))
240 |             if self.lexical_alignments is not None:
241 |                 lexical_alignment = self.lexical_alignments[summary_idx]
242 |                 for summary_span in lexical_alignment.keys():
243 |                     start, end = summary_span
244 |                     matches_ngram[slice(start, end)] = [True] * (end - start)
245 |                     span_id = f"{summary_idx}-{start}-{end}"
246 |                     sent_idx = token_idx_to_sent_idx[start]
247 |                     spans.append((
248 |                         start,
249 |                         end,
250 |                         sent_idx,
251 |                         get_color(sent_idx),
252 |                         span_id
253 |                     ))
254 | 
255 |             if self.semantic_alignments is not None:
256 |                 semantic_alignment = self.semantic_alignments[summary_idx]
257 |             else:
258 |                 semantic_alignment = {}
259 |             token_elements = []
260 |             for token_idx, token in enumerate(summary):
261 |                 if token.is_stop or token.is_punct:
262 |                     classes = ["stopword"]
263 |                     if self.gray_out_stopwords:
264 |                         classes.append("grayed-out")
265 |                     el = span(
266 |                         _class=" ".join(classes)
267 |                     )(
268 |                         token.text
269 |                     )
270 |                 else:
271 |                     classes = []
272 |                     if token.ent_iob_ in ('I', 'B'):
273 |                         classes.append("entity")
274 |                     if matches_ngram[token_idx]:
275 |                         classes.append("matches-ngram")
276 |                     matches = semantic_alignment.get(token_idx)
277 |                     if matches:
278 |                         top_match = max(matches, key=itemgetter(1))
279 |                         top_sim = max(top_match[1], 0)
280 |                         top_doc_token_idx = top_match[0]
281 |                         props = {
282 |                             "data-highlight-id": f"{summary_idx}-{token_idx}",
283 |                             "data-top-doc-highlight-id": str(top_doc_token_idx),
284 |                             "data-top-doc-sim": f"{top_sim:.2f}",
285 |                         }
286 |                         classes.extend([
287 |                             "annotation-hidden",
288 |                             f"summary-highlight-{summary_idx}-{token_idx}"
289 |                         ])
290 |                         sent_idx = token_idx_to_sent_idx[token_idx]
291 |                         el = self._highlight(
292 |                             token.text,
293 |                             color_with_opacity(get_color(sent_idx), top_sim),
294 |                             color_with_opacity(get_color(sent_idx), 1),
295 |                             classes,
296 |                             **props
297 |                         )
298 |                     else:
299 |                         if classes:
300 |                             el = span(_class=" ".join(classes))(token.text)
301 |                         else:
302 |                             el = token.text
303 |                 token_elements.append(el)
304 | 
305 |             token_elements = mu.markup(token_elements, spans)
306 | 
307 |             classes = ["summary-item"]
308 |             if summary_idx == 0:  # Default is for first summary to be selected
309 |                 classes.append("selected")
310 | 
311 |             summary_items.append(
312 |                 div(
313 |                     **{"class": ' '.join(classes), "data-index": summary_idx}
314 |                 )(
315 |                     div(_class="name")(summary._.name),
316 |                     div(_class="content")(token_elements)
317 |                 )
318 |             )
319 |         classes = ["summary-list", "bordered"]
320 |         if self.scroll:
321 |             classes.append("scroll")
322 |         if self.lexical_alignments is not None:
323 |             classes.append("has-lexical-alignment")
324 |         if self.semantic_alignments is not None:
325 |             classes.append("has-semantic-alignment")
326 |         summary_list = div(
327 |             _class=" ".join(classes)
328 |         )(
329 |             summary_items
330 |         )
331 | 
332 |         annotation_key = \
333 |             """
334 |               <ul class="annotation-key">
335 |                 <li class="annotation-key-label">Annotations:</li>
336 |                 <li id="option-lexical" class="option selected">
337 |                     <span class="annotation-key-ngram">N-Gram overlap</span>
338 |                 </li>
339 |                 <li id="option-semantic" class="option selected">
340 |                     <span class="annotation-key-semantic">Semantic overlap</span>
341 |                 </li>
342 |                 <li id="option-novel" class="option selected">
343 |                     <span class="annotation-key-novel">Novel words</span>
344 |                 </li>
345 |                 <li id="option-entity" class="option selected">
346 |                     <span class="annotation-key-entity">Novel entities</span>
347 |                 </li>
348 |         
349 |             </ul>
350 |             """
351 | 
352 |         body = div(
353 |             annotation_key,
354 |             div(
355 |                 _class=f"vis-container {self.layout}-layout"
356 |             )(
357 |                 div(
358 |                     _class="doc-container"
359 |                 )(
360 |                     doc_header,
361 |                     *doc_elements
362 |                 ),
363 |                 div(
364 |                     _class="summary-container"
365 |                 )(
366 |                     summary_header,
367 |                     summary_list
368 |                 )
369 |             ),
370 |         )
371 |         return [
372 |             """<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-+0n0xVW2eSR5OomGNYDnhzAbDsOXxcvSN1TPprVMTNDbiYZCxYbOOl7+AMvyTG2x" crossorigin="anonymous">""",
373 |             local_stylesheet(Path(__file__).parent / "resources" / "summvis.css"),
374 |             """<link rel="preconnect" href="https://fonts.gstatic.com">
375 |                 <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500&display=swap" rel="stylesheet">""",
376 |             body,
377 |             """<script
378 |                 src="https://code.jquery.com/jquery-3.5.1.min.js"
379 |                 integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
380 |                 crossorigin="anonymous"></script>
381 |                 <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/js/bootstrap.bundle.min.js"
382 |                  integrity="sha384-Piv4xVNRyMGpqkS2by6br4gNJ7DXjqk09RmUpJ8jgGtD7zP9yug3goQfGII0yAns"
383 |                   crossorigin="anonymous"></script>""",
384 |             local_script(Path(__file__).parent / "resources" / "jquery.color-2.1.2.min.js"),
385 |             local_script(Path(__file__).parent / "resources" / "summvis.js"),
386 |             """<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.1/dist/js/bootstrap.bundle.min.js" integrity="sha384-gtEjrD/SeCtmISkJkNUaaKMoLD0//ElJ19smozuHV6z3Iehds+3Ulb9Bn9Plx0x4" crossorigin="anonymous"></script>"""
387 |         ]
388 | 
389 |     def _highlight(
390 |         self,
391 |         token: Union[str, HtmlElement],
392 |         background_color,
393 |         dotted_underline_color,
394 |         classes: List[str],
395 |         **props
396 |     ):
397 |         return span(
398 |             _class=" ".join(classes + ["highlight"]),
399 |             style=styles(
400 |                 background_color=background_color,
401 |                 border_bottom=f"4px dotted {dotted_underline_color}",
402 |             ),
403 |             **props
404 |         )(token)
405 | 
406 | 
407 | SPACE = "&ensp;"
408 | 
409 | 
410 | class MultiUnderline:
411 |     def __init__(
412 |         self,
413 |         underline_thickness=3,
414 |         underline_spacing=1
415 |     ):
416 |         self.underline_thickness = underline_thickness
417 |         self.underline_spacing = underline_spacing
418 | 
419 |     def markup(
420 |         self,
421 |         tokens: List[Union[str, HtmlElement]],
422 |         spans: List[Tuple[int, int, int, str, str]]
423 |     ):
424 |         """Style text with multiple layers of colored underlines.
425 |             Args:
426 |                 tokens: list of tokens, either string or html element
427 |                 spans: list of (start_pos, end_pos, rank, color, id) tuples defined as:
428 |                     start_pos: start position of underline span
429 |                     end_pos: end position of underline span
430 |                     rank: rank for stacking order of underlines, all else being equal
431 |                     color: color of underline
432 |                     id: id of underline (encoded as a class label in resulting html element)
433 |             Returns:
434 |                 List of HTML elements
435 |         """
436 | 
437 |         # Map from span start position to span
438 |         start_to_spans = defaultdict(list)
439 |         for span in spans:
440 |             start = span[0]
441 |             start_to_spans[start].append(span)
442 | 
443 |         # Map from each underline slot position to list of active spans
444 |         slot_to_spans = {}
445 | 
446 |         # Collection of html elements
447 |         elements = []
448 | 
449 |         first_token_in_line = True
450 |         for pos, token in enumerate(tokens):
451 |             # Remove spans that are no longer active (end < pos)
452 |             slot_to_spans = defaultdict(
453 |                 list,
454 |                 {
455 |                     slot: [span for span in spans if span[1] > pos]  # span[1] contains end of spans
456 |                     for slot, spans in slot_to_spans.items() if spans
457 |                 }
458 |             )
459 | 
460 |             # Add underlines to space between tokens for any continuing underlines
461 |             if first_token_in_line:
462 |                 first_token_in_line = False
463 |             else:
464 |                 elements.append(self._get_underline_element(SPACE, slot_to_spans))
465 | 
466 |             # Find slot for any new spans
467 |             new_spans = start_to_spans.pop(pos, None)
468 |             if new_spans:
469 |                 new_spans.sort(
470 |                     key=lambda span: (-(span[1] - span[0]), span[2]))  # Sort by span length (reversed), rank
471 |                 for new_span in new_spans:
472 |                     # Find an existing slot or add a new one
473 |                     for slot, spans in sorted(slot_to_spans.items(), key=itemgetter(0)):  # Sort by slot index
474 |                         if spans:
475 |                             containing_span = spans[
476 |                                 0]  # The first span in the slot strictly contains all other spans
477 |                             containing_start, containing_end = containing_span[0:2]
478 |                             containing_color = containing_span[3]
479 |                             start, end = new_span[0:2]
480 |                             color = new_span[3]
481 |                             # If the new span (1) is strictly contained in this span, or (2) exactly matches this span
482 |                             # and is the same color, then add span to this slot
483 |                             if end <= containing_end and (
484 |                                 (start > containing_start or end < containing_end) or
485 |                                 (start == containing_start and end == containing_end and color == containing_color)
486 |                             ):
487 |                                 spans.append(new_span)
488 |                                 break
489 |                     else:
490 |                         # Find a new slot index to add the span
491 |                         for slot_index in count():
492 |                             spans = slot_to_spans[slot_index]
493 |                             if not spans:  # If slot is free, take it
494 |                                 spans.append(new_span)
495 |                                 break
496 |             if token in ("\n", "\r", "\r\n"):
497 |                 elements.append(br())
498 |                 first_token_in_line = True
499 |             else:
500 |                 # Add underlines to token for all active spans
501 |                 elements.append(self._get_underline_element(token, slot_to_spans))
502 |         return elements
503 | 
504 |     def _get_underline_element(self, token, slot_to_spans):
505 |         if not slot_to_spans:
506 |             return token
507 |         max_slot_index = max(slot_to_spans.keys())
508 |         element = token
509 |         for slot_index in range(max_slot_index + 1):
510 |             spans = slot_to_spans[slot_index]
511 |             if not spans:
512 |                 color = "rgba(0, 0, 0, 0)"  # Transparent element w/opacity=0
513 |                 props = {}
514 |             else:
515 |                 containing_slot = spans[0]
516 |                 color = containing_slot[3]
517 |                 classes = ["underline"]
518 |                 if token != SPACE:
519 |                     classes.append("token-underline")
520 |                 classes.extend([f"span-{span[4]}" for span in spans])  # Encode ids in class names
521 |                 props = {
522 |                     "class": " ".join(classes),
523 |                     "data-primary-color": color
524 |                 }
525 |             if slot_index == 0:
526 |                 padding_bottom = 0
527 |             else:
528 |                 padding_bottom = self.underline_spacing
529 |             display = "inline-block"
530 |             element = htbuilder.span(
531 |                 style=styles(
532 |                     display=display,
533 |                     border_bottom=f"{self.underline_thickness}px solid",
534 |                     border_color=color,
535 |                     padding_bottom=px(padding_bottom),
536 |                 ),
537 |                 **props
538 |             )(element)
539 | 
540 |             # Return outermost nested span
541 |         return element
542 | 
543 | 
544 | if __name__ == "__main__":
545 |     from htbuilder import div
546 | 
547 |     # Test
548 |     text = "The quick brown fox jumps"
549 |     tokens = text.split()
550 |     tokens = [
551 |         "The",
552 |         htbuilder.span(style=styles(color="red"))("quick"),
553 |         "brown",
554 |         "fox",
555 |         "jumps"
556 |     ]
557 |     spans = [
558 |         (0, 2, 0, "green", "green1"),
559 |         (1, 3, 0, "orange", "orange1"),
560 |         (3, 4, 0, "red", "red1"),
561 |         (2, 4, 0, "blue", "blue1"),
562 |         (1, 5, 0, "orange", "orange1"),
563 |     ]
564 | 
565 |     mu = MultiUnderline()
566 |     html = str(div(mu.markup(tokens, spans)))
567 |     print(html)
568 | 


--------------------------------------------------------------------------------
/examples/wikinews/README.md:
--------------------------------------------------------------------------------
1 | The [WikiNews example](https://en.wikinews.org/wiki/Argentina%27s_first_lady_launches_presidential_bid) was obtained under [Creative Commons 2.5](https://creativecommons.org/licenses/by/2.5/). Article metadata was removed and the body of the text was reformatted. The article headline serves as the summary.


--------------------------------------------------------------------------------
/examples/wikinews/load.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Make sure to first run: python -m spacy download en_core_web_lg
 4 | 
 5 | # Generate predictions from 4 models for wikinews.jsonl file
 6 | python ../../generation.py --model pegasus-cnndm --data_path wikinews.jsonl &&
 7 | python ../../generation.py --model pegasus-xsum --data_path wikinews.jsonl &&
 8 | python ../../generation.py --model bart-cnndm --data_path wikinews.jsonl &&
 9 | python ../../generation.py --model bart-xsum --data_path wikinews.jsonl &&
10 | 
11 | # Join predictions with original dataset
12 | python ../../join.py \
13 |   --data_path wikinews.jsonl \
14 |   --generation_paths \
15 |       pegasus-cnndm.wikinews.predictions \
16 |       pegasus-xsum.wikinews.predictions \
17 |       bart-cnndm.wikinews.predictions \
18 |       bart-xsum.wikinews.predictions \
19 |   --output_path wikinews-decoded.jsonl &&
20 | 
21 | # Cache results
22 | python ../../preprocessing.py \
23 | --workflow \
24 | --dataset_jsonl wikinews-decoded.jsonl \
25 | --processed_dataset_path wikinews.cache
26 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/meta.yaml:
--------------------------------------------------------------------------------
 1 | column_dtypes:
 2 |   BertscoreAligner:spacy:document:spacy:summary:bart-cnndm: &id001 !!python/name:meerkat.columns.list_column.ListColumn ''
 3 |   BertscoreAligner:spacy:document:spacy:summary:bart-xsum: *id001
 4 |   BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001
 5 |   BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum: *id001
 6 |   BertscoreAligner:spacy:document:spacy:summary:reference: *id001
 7 |   BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001
 8 |   BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001
 9 |   BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001
10 |   BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001
11 |   NGramAligner:spacy:document:spacy:summary:bart-cnndm: *id001
12 |   NGramAligner:spacy:document:spacy:summary:bart-xsum: *id001
13 |   NGramAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001
14 |   NGramAligner:spacy:document:spacy:summary:pegasus-xsum: *id001
15 |   NGramAligner:spacy:document:spacy:summary:reference: *id001
16 |   NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001
17 |   NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001
18 |   NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001
19 |   NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001
20 |   StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm: *id001
21 |   StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum: *id001
22 |   StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm: *id001
23 |   StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum: *id001
24 |   StaticEmbeddingAligner:spacy:document:spacy:summary:reference: *id001
25 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm: *id001
26 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum: *id001
27 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm: *id001
28 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum: *id001
29 |   document: &id002 !!python/name:meerkat.columns.pandas_column.PandasSeriesColumn ''
30 |   preprocessed_document: *id002
31 |   preprocessed_summary:bart-cnndm: *id002
32 |   preprocessed_summary:bart-xsum: *id002
33 |   preprocessed_summary:pegasus-cnndm: *id002
34 |   preprocessed_summary:pegasus-xsum: *id002
35 |   preprocessed_summary:reference: *id002
36 |   spacy:document: &id003 !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
37 |   spacy:summary:bart-cnndm: *id003
38 |   spacy:summary:bart-xsum: *id003
39 |   spacy:summary:pegasus-cnndm: *id003
40 |   spacy:summary:pegasus-xsum: *id003
41 |   spacy:summary:reference: *id003
42 |   summary:bart-cnndm: *id002
43 |   summary:bart-xsum: *id002
44 |   summary:pegasus-cnndm: *id002
45 |   summary:pegasus-xsum: *id002
46 |   summary:reference: *id002
47 | dtype: !!python/name:meerkat.datapanel.DataPanel ''
48 | len: 1
49 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/data.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/data.feather


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/blocks/140645734304016/meta.yaml:
--------------------------------------------------------------------------------
1 | klass: !!python/name:meerkat.block.pandas_block.PandasBlock ''
2 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/data.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.list_column.ListColumn ''
2 | len: 1
3 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/document/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/document/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_document/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_document/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:reference/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/preprocessed_summary:reference/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:document/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:document/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:document/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:bart-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-cnndm/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:pegasus-xsum/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/data.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/data.spacy


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/spacy:summary:reference/meta.yaml:
--------------------------------------------------------------------------------
1 | dtype: !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
2 | len: 1
3 | state:
4 |   _collate_fn: !!python/name:meerkat.mixins.collate.identity_collate ''
5 |   _formatter: !!python/name:meerkat.display.auto_formatter ''
6 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:bart-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-cnndm/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-cnndm/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-xsum/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:pegasus-xsum/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/columns/summary:reference/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/mgr/columns/summary:reference/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/mgr/meta.yaml:
--------------------------------------------------------------------------------
  1 | _column_order:
  2 | - summary:reference
  3 | - document
  4 | - summary:pegasus-cnndm
  5 | - summary:pegasus-xsum
  6 | - summary:bart-cnndm
  7 | - summary:bart-xsum
  8 | - preprocessed_document
  9 | - preprocessed_summary:reference
 10 | - preprocessed_summary:pegasus-cnndm
 11 | - preprocessed_summary:pegasus-xsum
 12 | - preprocessed_summary:bart-cnndm
 13 | - preprocessed_summary:bart-xsum
 14 | - spacy:document
 15 | - spacy:summary:reference
 16 | - spacy:summary:pegasus-cnndm
 17 | - spacy:summary:pegasus-xsum
 18 | - spacy:summary:bart-cnndm
 19 | - spacy:summary:bart-xsum
 20 | - BertscoreAligner:spacy:document:spacy:summary:reference
 21 | - BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm
 22 | - BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum
 23 | - BertscoreAligner:spacy:document:spacy:summary:bart-cnndm
 24 | - BertscoreAligner:spacy:document:spacy:summary:bart-xsum
 25 | - BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
 26 | - BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
 27 | - BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm
 28 | - BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum
 29 | - StaticEmbeddingAligner:spacy:document:spacy:summary:reference
 30 | - StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm
 31 | - StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum
 32 | - StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm
 33 | - StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum
 34 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
 35 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
 36 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm
 37 | - StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum
 38 | - NGramAligner:spacy:document:spacy:summary:reference
 39 | - NGramAligner:spacy:document:spacy:summary:pegasus-cnndm
 40 | - NGramAligner:spacy:document:spacy:summary:pegasus-xsum
 41 | - NGramAligner:spacy:document:spacy:summary:bart-cnndm
 42 | - NGramAligner:spacy:document:spacy:summary:bart-xsum
 43 | - NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm
 44 | - NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum
 45 | - NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm
 46 | - NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum
 47 | columns:
 48 |   BertscoreAligner:spacy:document:spacy:summary:bart-cnndm:
 49 |     dtype: &id001 !!python/name:meerkat.columns.list_column.ListColumn ''
 50 |     len: 1
 51 |   BertscoreAligner:spacy:document:spacy:summary:bart-xsum:
 52 |     dtype: *id001
 53 |     len: 1
 54 |   BertscoreAligner:spacy:document:spacy:summary:pegasus-cnndm:
 55 |     dtype: *id001
 56 |     len: 1
 57 |   BertscoreAligner:spacy:document:spacy:summary:pegasus-xsum:
 58 |     dtype: *id001
 59 |     len: 1
 60 |   BertscoreAligner:spacy:document:spacy:summary:reference:
 61 |     dtype: *id001
 62 |     len: 1
 63 |   BertscoreAligner:spacy:summary:reference:spacy:summary:bart-cnndm:
 64 |     dtype: *id001
 65 |     len: 1
 66 |   BertscoreAligner:spacy:summary:reference:spacy:summary:bart-xsum:
 67 |     dtype: *id001
 68 |     len: 1
 69 |   BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm:
 70 |     dtype: *id001
 71 |     len: 1
 72 |   BertscoreAligner:spacy:summary:reference:spacy:summary:pegasus-xsum:
 73 |     dtype: *id001
 74 |     len: 1
 75 |   NGramAligner:spacy:document:spacy:summary:bart-cnndm:
 76 |     dtype: *id001
 77 |     len: 1
 78 |   NGramAligner:spacy:document:spacy:summary:bart-xsum:
 79 |     dtype: *id001
 80 |     len: 1
 81 |   NGramAligner:spacy:document:spacy:summary:pegasus-cnndm:
 82 |     dtype: *id001
 83 |     len: 1
 84 |   NGramAligner:spacy:document:spacy:summary:pegasus-xsum:
 85 |     dtype: *id001
 86 |     len: 1
 87 |   NGramAligner:spacy:document:spacy:summary:reference:
 88 |     dtype: *id001
 89 |     len: 1
 90 |   NGramAligner:spacy:summary:reference:spacy:summary:bart-cnndm:
 91 |     dtype: *id001
 92 |     len: 1
 93 |   NGramAligner:spacy:summary:reference:spacy:summary:bart-xsum:
 94 |     dtype: *id001
 95 |     len: 1
 96 |   NGramAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm:
 97 |     dtype: *id001
 98 |     len: 1
 99 |   NGramAligner:spacy:summary:reference:spacy:summary:pegasus-xsum:
100 |     dtype: *id001
101 |     len: 1
102 |   StaticEmbeddingAligner:spacy:document:spacy:summary:bart-cnndm:
103 |     dtype: *id001
104 |     len: 1
105 |   StaticEmbeddingAligner:spacy:document:spacy:summary:bart-xsum:
106 |     dtype: *id001
107 |     len: 1
108 |   StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-cnndm:
109 |     dtype: *id001
110 |     len: 1
111 |   StaticEmbeddingAligner:spacy:document:spacy:summary:pegasus-xsum:
112 |     dtype: *id001
113 |     len: 1
114 |   StaticEmbeddingAligner:spacy:document:spacy:summary:reference:
115 |     dtype: *id001
116 |     len: 1
117 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-cnndm:
118 |     dtype: *id001
119 |     len: 1
120 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:bart-xsum:
121 |     dtype: *id001
122 |     len: 1
123 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-cnndm:
124 |     dtype: *id001
125 |     len: 1
126 |   StaticEmbeddingAligner:spacy:summary:reference:spacy:summary:pegasus-xsum:
127 |     dtype: *id001
128 |     len: 1
129 |   document:
130 |     block:
131 |       block_dir: blocks/140645734304016
132 |       block_index: document
133 |       mmap: false
134 |     dtype: &id002 !!python/name:meerkat.columns.pandas_column.PandasSeriesColumn ''
135 |     len: 1
136 |   preprocessed_document:
137 |     block:
138 |       block_dir: blocks/140645734304016
139 |       block_index: preprocessed_document
140 |       mmap: false
141 |     dtype: *id002
142 |     len: 1
143 |   preprocessed_summary:bart-cnndm:
144 |     block:
145 |       block_dir: blocks/140645734304016
146 |       block_index: preprocessed_summary:bart-cnndm
147 |       mmap: false
148 |     dtype: *id002
149 |     len: 1
150 |   preprocessed_summary:bart-xsum:
151 |     block:
152 |       block_dir: blocks/140645734304016
153 |       block_index: preprocessed_summary:bart-xsum
154 |       mmap: false
155 |     dtype: *id002
156 |     len: 1
157 |   preprocessed_summary:pegasus-cnndm:
158 |     block:
159 |       block_dir: blocks/140645734304016
160 |       block_index: preprocessed_summary:pegasus-cnndm
161 |       mmap: false
162 |     dtype: *id002
163 |     len: 1
164 |   preprocessed_summary:pegasus-xsum:
165 |     block:
166 |       block_dir: blocks/140645734304016
167 |       block_index: preprocessed_summary:pegasus-xsum
168 |       mmap: false
169 |     dtype: *id002
170 |     len: 1
171 |   preprocessed_summary:reference:
172 |     block:
173 |       block_dir: blocks/140645734304016
174 |       block_index: preprocessed_summary:reference
175 |       mmap: false
176 |     dtype: *id002
177 |     len: 1
178 |   spacy:document:
179 |     dtype: &id003 !!python/name:meerkat.columns.spacy_column.SpacyColumn ''
180 |     len: 1
181 |   spacy:summary:bart-cnndm:
182 |     dtype: *id003
183 |     len: 1
184 |   spacy:summary:bart-xsum:
185 |     dtype: *id003
186 |     len: 1
187 |   spacy:summary:pegasus-cnndm:
188 |     dtype: *id003
189 |     len: 1
190 |   spacy:summary:pegasus-xsum:
191 |     dtype: *id003
192 |     len: 1
193 |   spacy:summary:reference:
194 |     dtype: *id003
195 |     len: 1
196 |   summary:bart-cnndm:
197 |     block:
198 |       block_dir: blocks/140645734304016
199 |       block_index: summary:bart-cnndm
200 |       mmap: false
201 |     dtype: *id002
202 |     len: 1
203 |   summary:bart-xsum:
204 |     block:
205 |       block_dir: blocks/140645734304016
206 |       block_index: summary:bart-xsum
207 |       mmap: false
208 |     dtype: *id002
209 |     len: 1
210 |   summary:pegasus-cnndm:
211 |     block:
212 |       block_dir: blocks/140645734304016
213 |       block_index: summary:pegasus-cnndm
214 |       mmap: false
215 |     dtype: *id002
216 |     len: 1
217 |   summary:pegasus-xsum:
218 |     block:
219 |       block_dir: blocks/140645734304016
220 |       block_index: summary:pegasus-xsum
221 |       mmap: false
222 |     dtype: *id002
223 |     len: 1
224 |   summary:reference:
225 |     block:
226 |       block_dir: blocks/140645734304016
227 |       block_index: summary:reference
228 |       mmap: false
229 |     dtype: *id002
230 |     len: 1
231 | dtype: !!python/name:meerkat.block.manager.BlockManager ''
232 | 


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.cache/state.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/examples/wikinews/wikinews.cache/state.dill


--------------------------------------------------------------------------------
/examples/wikinews/wikinews.jsonl:
--------------------------------------------------------------------------------
1 | {"document": "Friday, July 20, 2007\n\nCristina Fern\u00e1ndez de Kirchner\n\nCurrent senator and Argentine First Lady Cristina Fernandez de Kirchner announced her presidential candidacy yesterday evening in La Plata, a city 50 kilometers (31 miles) away from Buenos Aires.\n\nMrs. Kirchner announced her intention to run for president at the Argentine Theatre, the same location she used to start her 2005 campaign for the Senate as member of the Buenos Aires province delegation.\n\nA large security detail was in place at the theatre and eight city blocks were closed to traffic for the event. Outgoing Argentine president N\u00e9stor Kirchner, members of his office, and provincial governors were present at the ceremony. Julio Cobos, governor of Mendoza and possible campaign team member, was also there.\n\nActivists from Quebracho mobilized in La Plata to protest the candidacy of the First Lady, and created some disturbances near the event. None of the protesters was arrested, according to an official spokesman of La Plata Police Station.\n\nRecent polls indicate that Mrs. Kirchner has at least 40 percent voter support, which bodes well for winning on the first ballot in the upcoming October elections.\n\nAccording to Argentine newspaper La Naci\u00f3n, this speech signifies that the government is attempting to show solidarity in the midst of recent scandals: Felisa Miceli's resignation to the Department of Economy after being accused of wrongdoing by a justice over the nearly $100,000 Argentine pesos and US$31,670 found in her office, and the alleged smuggling of weapons and irregularities in the office of the Secretary of Environment.", "summary:reference": "Argentina's first lady to launch presidential bid"}
2 | 


--------------------------------------------------------------------------------
/examples/xsum/load.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Make sure to first run: python -m spacy download en_core_web_lg
 4 | 
 5 | # Dump one example from XSum validation split to .jsonl format. This may take several minutes if loading for first time.
 6 | python ../../preprocessing.py \
 7 | --standardize \
 8 | --dataset xsum \
 9 | --split validation \
10 | --save_jsonl_path xsum.jsonl \
11 | --n_samples 1 &&
12 | 
13 | # Generate predictions from 4 models
14 | python ../../generation.py --model pegasus-cnndm --data_path xsum.jsonl &&
15 | python ../../generation.py --model pegasus-xsum --data_path xsum.jsonl &&
16 | python ../../generation.py --model bart-cnndm --data_path xsum.jsonl &&
17 | python ../../generation.py --model bart-xsum --data_path xsum.jsonl &&
18 | 
19 | # Join predictions with original dataset
20 | python ../../join.py \
21 |   --data_path xsum.jsonl \
22 |   --generation_paths \
23 |       pegasus-xsum.xsum.predictions \
24 |       pegasus-xsum.xsum.predictions \
25 |       bart-xsum.xsum.predictions \
26 |       bart-xsum.xsum.predictions \
27 |   --output_path xsum-decoded.jsonl
28 | 
29 | # Cache results
30 | python ../../preprocessing.py \
31 | --workflow \
32 | --dataset_jsonl xsum-decoded.jsonl \
33 | --processed_dataset_path xsum.cache
34 | 


--------------------------------------------------------------------------------
/generation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script for decoding summarization models available through Huggingface Transformers.
  3 | 
  4 | To use with one of the 6 standard models:
  5 | python generation.py --model <model abbreviation> --data_path <path to data in jsonl format>
  6 |     where model abbreviation is one of: bart-xsum, bart-cnndm, pegasus-xsum, pegasus-cnndm, pegasus-newsroom,
  7 |     pegasus-multinews:
  8 | 
  9 | To use with arbitrary model:
 10 | python generation.py --model_name_or_path <Huggingface model name or local path> --data_path <path to data in jsonl format>
 11 | 
 12 | """
 13 | # !/usr/bin/env python
 14 | # coding: utf-8
 15 | 
 16 | import argparse
 17 | import json
 18 | import os
 19 | 
 20 | import torch
 21 | from tqdm import tqdm
 22 | from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 23 | 
 24 | BATCH_SIZE = 8
 25 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 26 | 
 27 | BART_CNNDM_CHECKPOINT = 'facebook/bart-large-cnn'
 28 | BART_XSUM_CHECKPOINT = 'facebook/bart-large-xsum'
 29 | PEGASUS_CNNDM_CHECKPOINT = 'google/pegasus-cnn_dailymail'
 30 | PEGASUS_XSUM_CHECKPOINT = 'google/pegasus-xsum'
 31 | PEGASUS_NEWSROOM_CHECKPOINT = 'google/pegasus-newsroom'
 32 | PEGASUS_MULTINEWS_CHECKPOINT = 'google/pegasus-multi_news'
 33 | 
 34 | MODEL_CHECKPOINTS = {
 35 |     'bart-xsum': BART_XSUM_CHECKPOINT,
 36 |     'bart-cnndm': BART_CNNDM_CHECKPOINT,
 37 |     'pegasus-xsum': PEGASUS_XSUM_CHECKPOINT,
 38 |     'pegasus-cnndm': PEGASUS_CNNDM_CHECKPOINT,
 39 |     'pegasus-newsroom': PEGASUS_NEWSROOM_CHECKPOINT,
 40 |     'pegasus-multinews': PEGASUS_MULTINEWS_CHECKPOINT
 41 | }
 42 | 
 43 | 
 44 | class JSONDataset(torch.utils.data.Dataset):
 45 |     def __init__(self, data_path):
 46 |         super(JSONDataset, self).__init__()
 47 | 
 48 |         with open(data_path) as fd:
 49 |             self.data = [json.loads(line) for line in fd]
 50 | 
 51 |     def __len__(self):
 52 |         return len(self.data)
 53 | 
 54 |     def __getitem__(self, idx):
 55 |         return self.data[idx]
 56 | 
 57 | 
 58 | def postprocess_data(decoded):
 59 |     """
 60 |     Remove generation artifacts and postprocess outputs
 61 | 
 62 |     :param decoded: model outputs
 63 |     """
 64 |     return [x.replace('<n>', ' ') for x in decoded]
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     parser = argparse.ArgumentParser(description='Process some integers.')
 69 |     parser.add_argument('--model', type=str)
 70 |     parser.add_argument('--model_name_or_path', type=str)
 71 |     parser.add_argument('--data_path', type=str)
 72 |     args = parser.parse_args()
 73 | 
 74 |     if not (args.model or args.model_name_or_path):
 75 |         raise ValueError('Model is required')
 76 | 
 77 |     if args.model and args.model_name_or_path:
 78 |         raise ValueError('Specify model or model_name_or_path but not both')
 79 | 
 80 |     # Load models & data
 81 |     if args.model:
 82 |         model_name_or_path = MODEL_CHECKPOINTS[args.model]
 83 |         file_model_name = args.model
 84 |     else:
 85 |         model_name_or_path = args.model_name_or_path
 86 |         file_model_name = model_name_or_path.replace("/", "-")
 87 |     model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path).to(DEVICE)
 88 |     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
 89 | 
 90 |     dataset = JSONDataset(args.data_path)
 91 | 
 92 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE)
 93 | 
 94 |     # Write out dataset
 95 |     file_dataset_name = os.path.splitext(os.path.basename(args.data_path))[0]
 96 |     filename = f'{file_model_name}.{file_dataset_name}.predictions'
 97 |     fd_out = open(filename, 'w')
 98 | 
 99 |     model.eval()
100 |     with torch.no_grad():
101 |         for raw_data in tqdm(dataloader):
102 |             batch = tokenizer(raw_data["document"], return_tensors="pt", truncation=True, padding="longest").to(DEVICE)
103 |             summaries = model.generate(input_ids=batch.input_ids, attention_mask=batch.attention_mask)
104 |             decoded = tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False)
105 |             for example in postprocess_data(decoded):
106 |                 fd_out.write(example + '\n')
107 | 


--------------------------------------------------------------------------------
/join.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Script for joining dataset of documents/reference summaries with generated summaries (likely from generate.py).
 3 | 
 4 | Usage with custom datasets in JSONL format:
 5 | python join.py --data_path <path to data in jsonl format> --generation_paths <paths to generated predictions>  --output_path <path to output file>
 6 | 
 7 | Optionally specify --model_names to override default model names.
 8 | 
 9 | """
10 | # !/usr/bin/env python
11 | # coding: utf-8
12 | 
13 | import argparse
14 | import json
15 | import os
16 | from pathlib import Path
17 | 
18 | import torch
19 | from tqdm import tqdm
20 | 
21 | BATCH_SIZE = 8
22 | 
23 | 
24 | class JSONDataset(torch.utils.data.Dataset):
25 |     def __init__(self, data_path):
26 |         super(JSONDataset, self).__init__()
27 | 
28 |         with open(data_path) as fd:
29 |             self.data = [json.loads(line) for line in fd]
30 | 
31 |     def __len__(self):
32 |         return len(self.data)
33 | 
34 |     def __getitem__(self, idx):
35 |         return self.data[idx]
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--data_path', type=str)
41 |     parser.add_argument('--generation_paths', type=str, nargs="+", required=True)
42 |     parser.add_argument('--output_path', type=str, required=True)
43 |     parser.add_argument('--model_names', type=str, nargs="+")
44 |     args = parser.parse_args()
45 | 
46 |     if args.model_names and len(args.generation_paths) != len(args.model_names):
47 |         raise ValueError('Length of args.generation_paths must equal length of args.model_names')
48 | 
49 |     if args.model_names:
50 |         model_names = args.model_names
51 |     else:
52 |         model_names = [Path(p).name.split(".")[0] for p in args.generation_paths]
53 | 
54 |     args.dataset = os.path.splitext(os.path.basename(args.data_path))[0]
55 |     args.split = 'user'
56 | 
57 |     # Load data
58 | 
59 |     dataset = JSONDataset(args.data_path)
60 | 
61 |     # Join files and write out single jsonl dataset
62 | 
63 |     generation_files = [open(fname) for fname in args.generation_paths]
64 | 
65 |     with open(args.output_path, 'w') as outp:
66 |         for row in tqdm(zip(dataset, *generation_files)):
67 |             # Process each original data record in parallel with generation(s) of the model(s)
68 |             result = {}
69 |             data = row[0]
70 |             generations = row[1:]
71 |             result['summary:reference'] = data['summary:reference']
72 |             result['document'] = data['document']
73 |             for model_name, gen in zip(model_names, generations):
74 |                 result[f'summary:{model_name}'] = gen
75 |             outp.write(
76 |                 json.dumps(result) + '\n'
77 |             )
78 | 
79 |     for file in generation_files:
80 |         file.close()
81 | 


--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from argparse import ArgumentParser
  3 | from typing import List
  4 | 
  5 | from meerkat import DataPanel, SpacyColumn
  6 | from meerkat.logging.utils import set_logging_level
  7 | from spacy import load
  8 | 
  9 | from align import BertscoreAligner, NGramAligner, StaticEmbeddingAligner, Aligner
 10 | from utils import clean_text
 11 | 
 12 | set_logging_level('critical')
 13 | logger = logging.getLogger(__name__)
 14 | logger.setLevel(logging.CRITICAL)
 15 | 
 16 | 
 17 | def _run_aligners(
 18 |     dataset: DataPanel,
 19 |     aligners: List[Aligner],
 20 |     doc_column: str,
 21 |     reference_column: str,
 22 |     summary_columns: List[str] = None,
 23 | ):
 24 |     if not summary_columns:
 25 |         summary_columns = []
 26 | 
 27 |     to_columns = []
 28 |     if reference_column is not None:
 29 |         to_columns.append(reference_column)
 30 |     to_columns.extend(summary_columns)
 31 | 
 32 |     for aligner in aligners:
 33 | 
 34 |         # Run the aligner on (document, summary) pairs
 35 |         dataset = dataset.update(
 36 |             lambda x: {
 37 |                 f'{type(aligner).__name__}:{doc_column}:{to_columns}':
 38 |                     aligner.align(
 39 |                         x[doc_column],
 40 |                         [x[col] for col in to_columns],
 41 |                     ),
 42 |             },
 43 |         )
 44 | 
 45 |         if reference_column is not None and len(summary_columns):
 46 |             # Run the aligner on (reference, summary) pairs
 47 |             dataset = dataset.update(
 48 |                 lambda x: {
 49 |                     f'{type(aligner).__name__}:{reference_column}:{summary_columns}': aligner.align(
 50 |                         x[reference_column],
 51 |                         [x[col] for col in summary_columns],
 52 |                     ),
 53 |                 },
 54 |             )
 55 | 
 56 |         if len(to_columns) > 1:
 57 |             # Instead of having one column for (document, summary) comparisons, split
 58 |             # off into (1 + |summary_columns|) total columns, one for each comparison
 59 | 
 60 |             # Retrieve the (document, summary) column
 61 |             doc_summary_column = dataset[f'{type(aligner).__name__}:{doc_column}:{to_columns}']
 62 | 
 63 |             for i, col in enumerate(to_columns):
 64 |                 # Add as a new column after encoding with the aligner's `encode` method
 65 |                 dataset.add_column(
 66 |                     f'{type(aligner).__name__}:{doc_column}:{col}',
 67 |                     [row[i] for row in doc_summary_column],
 68 |                 )
 69 | 
 70 |             # Remove the (document, summary) column
 71 |             dataset.remove_column(f'{type(aligner).__name__}:{doc_column}:{to_columns}')
 72 | 
 73 |         if reference_column is not None and len(summary_columns) > 1:
 74 |             # Instead of having one column for (reference, summary) comparisons, split
 75 |             # off into (|summary_columns|) total columns, one for each comparison
 76 | 
 77 |             # Retrieve the (reference, summary) column
 78 |             reference_summary_column = dataset[f'{type(aligner).__name__}:{reference_column}:{summary_columns}']
 79 | 
 80 |             for i, col in enumerate(summary_columns):
 81 |                 # Add as a new column
 82 |                 dataset.add_column(
 83 |                     f'{type(aligner).__name__}:{reference_column}:{col}',
 84 |                     [row[i] for row in reference_summary_column],
 85 |                 )
 86 | 
 87 |             # Remove the (reference, summary) column
 88 |             dataset.remove_column(f'{type(aligner).__name__}:{reference_column}:{summary_columns}')
 89 | 
 90 |     return dataset
 91 | 
 92 | 
 93 | def load_nlp():
 94 |     try:
 95 |         return load('en_core_web_lg')
 96 |     except OSError:
 97 |         raise OSError("'en_core_web_lg model' is required unless loading from cached file."
 98 |                       "To install: 'python -m spacy download en_core_web_lg'")
 99 | 
100 | 
101 | def run_workflow(
102 |     jsonl_path: str,
103 |     doc_column: str = None,
104 |     reference_column: str = None,
105 |     summary_columns: List[str] = None,
106 |     bert_aligner_threshold: float = 0.5,
107 |     bert_aligner_top_k: int = 3,
108 |     embedding_aligner_threshold: float = 0.5,
109 |     embedding_aligner_top_k: int = 3,
110 |     processed_dataset_path: str = None,
111 |     n_samples: int = None
112 | ):
113 |     if not jsonl_path:
114 |         raise ValueError("'jsonl_path' is required")
115 | 
116 |     if not processed_dataset_path:
117 |         raise ValueError("Please specify a path to save the dataset.")
118 | 
119 |     # Load the dataset
120 |     dataset = DataPanel.from_jsonl(jsonl_path)
121 | 
122 |     if doc_column is None:
123 |         # Assume `doc_column` is called "document"
124 |         doc_column = 'document'
125 |         assert doc_column in dataset.columns, \
126 |             f"`doc_column={doc_column}` is not a column in datapanel."
127 |         print("Assuming `doc_column` is called 'document'.")
128 | 
129 |     if reference_column is None:
130 |         # Assume `reference_column` is called "summary:reference"
131 |         reference_column = 'summary:reference'
132 |         print("Assuming `reference_column` is called 'summary:reference'.")
133 |         if reference_column not in dataset.columns:
134 |             print("No reference summary loaded")
135 |             reference_column = None
136 | 
137 |     if summary_columns is None or len(summary_columns) == 0:
138 |         # Assume `summary_columns` are prefixed by "summary:"
139 |         summary_columns = []
140 |         for col in dataset.columns:
141 |             if col.startswith("summary:") and col != "summary:reference":
142 |                 summary_columns.append(col)
143 |         print(f"Reading summary columns from datapanel. Found {summary_columns}.")
144 | 
145 |     if len(summary_columns) == 0 and reference_column is None:
146 |         raise ValueError("At least one summary is required")
147 | 
148 |     # Restrict to the first `n_samples`
149 |     if n_samples:
150 |         print(f"Restricting to {n_samples} samples.")
151 |         dataset = dataset.head(n_samples)
152 | 
153 |     print("size of dataset:", len(dataset))
154 | 
155 |     # Combine the text columns into one list
156 |     text_columns = [doc_column] + ([reference_column] if reference_column else []) + summary_columns
157 | 
158 |     # Preprocessing all the text columns
159 |     print("Preprocessing text columns")
160 |     dataset = dataset.update(
161 |         lambda x: {
162 |             f'preprocessed_{k}': x[k] if args.no_clean else clean_text(x[k])
163 |             for k in text_columns
164 |         }
165 |     )
166 | 
167 |     # Run the Spacy pipeline on all preprocessed text columns
168 |     nlp = load_nlp()
169 | 
170 |     nlp.add_pipe('sentencizer', before="parser")
171 | 
172 |     print("Running spacy processing")
173 |     for col in text_columns:
174 |         dataset.add_column(f'spacy:{col}', SpacyColumn.from_docs(nlp.pipe(dataset[f'preprocessed_{col}'])))
175 | 
176 |     # Run the 3 align pipelines
177 |     bert_aligner = BertscoreAligner(
178 |         threshold=bert_aligner_threshold,
179 |         top_k=bert_aligner_top_k,
180 |     )
181 | 
182 |     embedding_aligner = StaticEmbeddingAligner(
183 |         threshold=embedding_aligner_threshold,
184 |         top_k=embedding_aligner_top_k,
185 |     )
186 | 
187 |     ngram_aligner = NGramAligner()
188 | 
189 |     dataset = _run_aligners(
190 |         dataset=dataset,
191 |         aligners=[bert_aligner, embedding_aligner, ngram_aligner],
192 |         doc_column=f'spacy:{doc_column}',
193 |         reference_column=f'spacy:{reference_column}' if reference_column else None,
194 |         summary_columns=[f'spacy:{col}' for col in summary_columns],
195 |     )
196 | 
197 |     # Save the dataset
198 |     dataset.write(processed_dataset_path)
199 | 
200 |     return dataset
201 | 
202 | 
203 | def standardize_dataset(
204 |     dataset_name: str,
205 |     dataset_version: str,
206 |     dataset_split: str,
207 |     save_jsonl_path: str,
208 |     doc_column: str = None,
209 |     reference_column: str = None,
210 |     n_samples: int = None
211 | 
212 | ):
213 |     """Load a dataset from Huggingface and dump it to disk."""
214 | 
215 |     if args.dataset is None or \
216 |         args.split is None or \
217 |         args.save_jsonl_path is None:
218 |         raise ValueError('Missing command line argument')
219 | 
220 |     # Load the dataset from Huggingface
221 |     dataset = get_dataset(
222 |         dataset_name=dataset_name,
223 |         dataset_version=dataset_version,
224 |         dataset_split=dataset_split
225 |     )
226 |     if n_samples:
227 |         dataset = dataset[:n_samples]
228 | 
229 |     if doc_column is None:
230 |         if reference_column is not None:
231 |             raise ValueError("You must specify `doc_column` if you specify `reference_column`")
232 |         try:
233 |             doc_column, reference_column = {
234 |                 'cnn_dailymail': ('article', 'highlights'),
235 |                 'xsum': ('document', 'summary')
236 |             }[dataset_name]
237 |         except:
238 |             raise NotImplementedError(
239 |                 "Please specify `doc_column`."
240 |             )
241 | 
242 |     # Rename the columns
243 |     if doc_column != 'document':
244 |         dataset.add_column('document', dataset[doc_column])
245 |         dataset.remove_column(doc_column)
246 |     dataset.add_column('summary:reference', dataset[reference_column])
247 |     dataset.remove_column(reference_column)
248 | 
249 |     # Save the dataset back to disk
250 |     dataset.to_jsonl(save_jsonl_path)
251 |     return dataset
252 | 
253 | 
254 | def get_dataset(
255 |     dataset_name: str = None,
256 |     dataset_version: str = None,
257 |     dataset_split: str = 'test',
258 |     dataset_jsonl: str = None,
259 | ):
260 |     """Load a dataset."""
261 |     assert (dataset_name is not None) != (dataset_jsonl is not None), \
262 |         "Specify one of `dataset_name` or `dataset_jsonl`."
263 | 
264 |     # Load the dataset
265 |     if dataset_name is not None:
266 |         return get_hf_dataset(dataset_name, dataset_version, dataset_split)
267 | 
268 |     return DataPanel.from_jsonl(json_path=dataset_jsonl)
269 | 
270 | 
271 | def get_hf_dataset(name: str, version: str = None, split: str = 'test'):
272 |     """Get dataset from Huggingface."""
273 |     if version:
274 |         return DataPanel.from_huggingface(name, version, split=split)
275 |     return DataPanel.from_huggingface(name, split=split)
276 | 
277 | 
278 | if __name__ == '__main__':
279 |     parser = ArgumentParser()
280 |     parser.add_argument('--dataset', type=str, choices=['cnn_dailymail', 'xsum'],
281 |                         help="Huggingface dataset name.")
282 |     parser.add_argument('--version', type=str,
283 |                         help="Huggingface dataset version.")
284 |     parser.add_argument('--split', type=str, default='test',
285 |                         help="Huggingface dataset split.")
286 |     parser.add_argument('--dataset_jsonl', type=str,
287 |                         help="Path to a jsonl file for the dataset.")
288 |     parser.add_argument('--save_jsonl_path', type=str,
289 |                         help="Path to save the processed jsonl dataset.")
290 |     parser.add_argument('--doc_column', type=str,
291 |                         help="Name of the document column in the dataset.")
292 |     parser.add_argument('--reference_column', type=str,
293 |                         help="Name of the reference summary column in the dataset.")
294 |     parser.add_argument('--summary_columns', nargs='+', default=[],
295 |                         help="Name of other summary columns in/added to the dataset.")
296 | 
297 |     parser.add_argument('--bert_aligner_threshold', type=float, default=0.1,
298 |                         help="Minimum threshold for BERT alignment.")
299 |     parser.add_argument('--bert_aligner_top_k', type=int, default=10,
300 |                         help="Top-k for BERT alignment.")
301 |     parser.add_argument('--embedding_aligner_threshold', type=float, default=0.1,
302 |                         help="Minimum threshold for embedding alignment.")
303 |     parser.add_argument('--embedding_aligner_top_k', type=int, default=10,
304 |                         help="Top-k for embedding alignment.")
305 |     parser.add_argument('--processed_dataset_path', type=str,
306 |                         help="Path to store the final processed dataset.")
307 |     parser.add_argument('--n_samples', type=int,
308 |                         help="Number of dataset samples to process.")
309 | 
310 |     parser.add_argument('--workflow', action='store_true', default=False,
311 |                         help="Whether to run the preprocessing workflow.")
312 |     parser.add_argument('--standardize', action='store_true', default=False,
313 |                         help="Whether to standardize the dataset and save to jsonl.")
314 |     parser.add_argument('--no_clean', action='store_true', default=False,
315 |                         help="Do not clean text (remove extraneous spaces, newlines).")
316 |     args = parser.parse_args()
317 | 
318 |     if args.standardize:
319 |         # Dump a Huggingface dataset to standardized jsonl format
320 |         standardize_dataset(
321 |             dataset_name=args.dataset,
322 |             dataset_version=args.version,
323 |             dataset_split=args.split,
324 |             save_jsonl_path=args.save_jsonl_path,
325 |             doc_column=args.doc_column,
326 |             reference_column=args.reference_column,
327 |             n_samples=args.n_samples
328 |         )
329 | 
330 |     if args.workflow:
331 |         # Run the processing workflow
332 |         run_workflow(
333 |             jsonl_path=args.dataset_jsonl,
334 |             doc_column=args.doc_column,
335 |             reference_column=args.reference_column,
336 |             summary_columns=args.summary_columns,
337 |             bert_aligner_threshold=args.bert_aligner_threshold,
338 |             bert_aligner_top_k=args.bert_aligner_top_k,
339 |             embedding_aligner_threshold=args.embedding_aligner_threshold,
340 |             embedding_aligner_top_k=args.embedding_aligner_top_k,
341 |             processed_dataset_path=args.processed_dataset_path,
342 |             n_samples=args.n_samples
343 |         )
344 | 


--------------------------------------------------------------------------------
/quickstart.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p data &&
 2 | mkdir -p preprocessing &&
 3 | curl https://storage.googleapis.com/sfr-summvis-data-research/cnn_dailymail_1000.validation.anonymized.zip > preprocessing/cnn_dailymail_1000.validation.anonymized.zip &&
 4 | unzip -o preprocessing/cnn_dailymail_1000.validation.anonymized.zip -d preprocessing/ &&
 5 | python preprocessing.py \
 6 | --deanonymize \
 7 | --dataset_rg preprocessing/cnn_dailymail_1000.validation.anonymized \
 8 | --dataset cnn_dailymail \
 9 | --version 3.0.0 \
10 | --split validation \
11 | --processed_dataset_path data/cnn_dailymail_10.validation \
12 | --n_samples 10


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # environment.yml must be kept in sync
 2 | spacy==3.0.3
 3 | streamlit==0.77.0
 4 | st-annotated-text==1.1.0
 5 | transformers==4.2.2
 6 | datasets==1.18.4
 7 | torch>=1.8.0,<2.0.0
 8 | bert-score==0.3.7
 9 | rouge-score==0.0.4
10 | toolz==0.11.1
11 | nltk==3.4.5
12 | meerkat-ml==0.2.4
13 | sentencepiece==0.1.95
14 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
15 | protobuf~=3.19.0
16 | 


--------------------------------------------------------------------------------
/resources/jquery.color-2.1.2.min.js:
--------------------------------------------------------------------------------
1 | /*! jQuery Color v@2.1.2 http://github.com/jquery/jquery-color | jquery.org/license */
2 | (function(a,b){function m(a,b,c){var d=h[b.type]||{};return a==null?c||!b.def?null:b.def:(a=d.floor?~~a:parseFloat(a),isNaN(a)?b.def:d.mod?(a+d.mod)%d.mod:0>a?0:d.max<a?d.max:a)}function n(b){var c=f(),d=c._rgba=[];return b=b.toLowerCase(),l(e,function(a,e){var f,h=e.re.exec(b),i=h&&e.parse(h),j=e.space||"rgba";if(i)return f=c[j](i),c[g[j].cache]=f[g[j].cache],d=c._rgba=f._rgba,!1}),d.length?(d.join()==="0,0,0,0"&&a.extend(d,k.transparent),c):k[b]}function o(a,b,c){return c=(c+1)%1,c*6<1?a+(b-a)*c*6:c*2<1?b:c*3<2?a+(b-a)*(2/3-c)*6:a}var c="backgroundColor borderBottomColor borderLeftColor borderRightColor borderTopColor color columnRuleColor outlineColor textDecorationColor textEmphasisColor",d=/^([\-+])=\s*(\d+\.?\d*)/,e=[{re:/rgba?\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*(?:,\s*(\d?(?:\.\d+)?)\s*)?\)/,parse:function(a){return[a[1],a[2],a[3],a[4]]}},{re:/rgba?\(\s*(\d+(?:\.\d+)?)\%\s*,\s*(\d+(?:\.\d+)?)\%\s*,\s*(\d+(?:\.\d+)?)\%\s*(?:,\s*(\d?(?:\.\d+)?)\s*)?\)/,parse:function(a){return[a[1]*2.55,a[2]*2.55,a[3]*2.55,a[4]]}},{re:/#([a-f0-9]{2})([a-f0-9]{2})([a-f0-9]{2})/,parse:function(a){return[parseInt(a[1],16),parseInt(a[2],16),parseInt(a[3],16)]}},{re:/#([a-f0-9])([a-f0-9])([a-f0-9])/,parse:function(a){return[parseInt(a[1]+a[1],16),parseInt(a[2]+a[2],16),parseInt(a[3]+a[3],16)]}},{re:/hsla?\(\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\%\s*,\s*(\d+(?:\.\d+)?)\%\s*(?:,\s*(\d?(?:\.\d+)?)\s*)?\)/,space:"hsla",parse:function(a){return[a[1],a[2]/100,a[3]/100,a[4]]}}],f=a.Color=function(b,c,d,e){return new a.Color.fn.parse(b,c,d,e)},g={rgba:{props:{red:{idx:0,type:"byte"},green:{idx:1,type:"byte"},blue:{idx:2,type:"byte"}}},hsla:{props:{hue:{idx:0,type:"degrees"},saturation:{idx:1,type:"percent"},lightness:{idx:2,type:"percent"}}}},h={"byte":{floor:!0,max:255},percent:{max:1},degrees:{mod:360,floor:!0}},i=f.support={},j=a("<p>")[0],k,l=a.each;j.style.cssText="background-color:rgba(1,1,1,.5)",i.rgba=j.style.backgroundColor.indexOf("rgba")>-1,l(g,function(a,b){b.cache="_"+a,b.props.alpha={idx:3,type:"percent",def:1}}),f.fn=a.extend(f.prototype,{parse:function(c,d,e,h){if(c===b)return this._rgba=[null,null,null,null],this;if(c.jquery||c.nodeType)c=a(c).css(d),d=b;var i=this,j=a.type(c),o=this._rgba=[];d!==b&&(c=[c,d,e,h],j="array");if(j==="string")return this.parse(n(c)||k._default);if(j==="array")return l(g.rgba.props,function(a,b){o[b.idx]=m(c[b.idx],b)}),this;if(j==="object")return c instanceof f?l(g,function(a,b){c[b.cache]&&(i[b.cache]=c[b.cache].slice())}):l(g,function(b,d){var e=d.cache;l(d.props,function(a,b){if(!i[e]&&d.to){if(a==="alpha"||c[a]==null)return;i[e]=d.to(i._rgba)}i[e][b.idx]=m(c[a],b,!0)}),i[e]&&a.inArray(null,i[e].slice(0,3))<0&&(i[e][3]=1,d.from&&(i._rgba=d.from(i[e])))}),this},is:function(a){var b=f(a),c=!0,d=this;return l(g,function(a,e){var f,g=b[e.cache];return g&&(f=d[e.cache]||e.to&&e.to(d._rgba)||[],l(e.props,function(a,b){if(g[b.idx]!=null)return c=g[b.idx]===f[b.idx],c})),c}),c},_space:function(){var a=[],b=this;return l(g,function(c,d){b[d.cache]&&a.push(c)}),a.pop()},transition:function(a,b){var c=f(a),d=c._space(),e=g[d],i=this.alpha()===0?f("transparent"):this,j=i[e.cache]||e.to(i._rgba),k=j.slice();return c=c[e.cache],l(e.props,function(a,d){var e=d.idx,f=j[e],g=c[e],i=h[d.type]||{};if(g===null)return;f===null?k[e]=g:(i.mod&&(g-f>i.mod/2?f+=i.mod:f-g>i.mod/2&&(f-=i.mod)),k[e]=m((g-f)*b+f,d))}),this[d](k)},blend:function(b){if(this._rgba[3]===1)return this;var c=this._rgba.slice(),d=c.pop(),e=f(b)._rgba;return f(a.map(c,function(a,b){return(1-d)*e[b]+d*a}))},toRgbaString:function(){var b="rgba(",c=a.map(this._rgba,function(a,b){return a==null?b>2?1:0:a});return c[3]===1&&(c.pop(),b="rgb("),b+c.join()+")"},toHslaString:function(){var b="hsla(",c=a.map(this.hsla(),function(a,b){return a==null&&(a=b>2?1:0),b&&b<3&&(a=Math.round(a*100)+"%"),a});return c[3]===1&&(c.pop(),b="hsl("),b+c.join()+")"},toHexString:function(b){var c=this._rgba.slice(),d=c.pop();return b&&c.push(~~(d*255)),"#"+a.map(c,function(a){return a=(a||0).toString(16),a.length===1?"0"+a:a}).join("")},toString:function(){return this._rgba[3]===0?"transparent":this.toRgbaString()}}),f.fn.parse.prototype=f.fn,g.hsla.to=function(a){if(a[0]==null||a[1]==null||a[2]==null)return[null,null,null,a[3]];var b=a[0]/255,c=a[1]/255,d=a[2]/255,e=a[3],f=Math.max(b,c,d),g=Math.min(b,c,d),h=f-g,i=f+g,j=i*.5,k,l;return g===f?k=0:b===f?k=60*(c-d)/h+360:c===f?k=60*(d-b)/h+120:k=60*(b-c)/h+240,h===0?l=0:j<=.5?l=h/i:l=h/(2-i),[Math.round(k)%360,l,j,e==null?1:e]},g.hsla.from=function(a){if(a[0]==null||a[1]==null||a[2]==null)return[null,null,null,a[3]];var b=a[0]/360,c=a[1],d=a[2],e=a[3],f=d<=.5?d*(1+c):d+c-d*c,g=2*d-f;return[Math.round(o(g,f,b+1/3)*255),Math.round(o(g,f,b)*255),Math.round(o(g,f,b-1/3)*255),e]},l(g,function(c,e){var g=e.props,h=e.cache,i=e.to,j=e.from;f.fn[c]=function(c){i&&!this[h]&&(this[h]=i(this._rgba));if(c===b)return this[h].slice();var d,e=a.type(c),k=e==="array"||e==="object"?c:arguments,n=this[h].slice();return l(g,function(a,b){var c=k[e==="object"?a:b.idx];c==null&&(c=n[b.idx]),n[b.idx]=m(c,b)}),j?(d=f(j(n)),d[h]=n,d):f(n)},l(g,function(b,e){if(f.fn[b])return;f.fn[b]=function(f){var g=a.type(f),h=b==="alpha"?this._hsla?"hsla":"rgba":c,i=this[h](),j=i[e.idx],k;return g==="undefined"?j:(g==="function"&&(f=f.call(this,j),g=a.type(f)),f==null&&e.empty?this:(g==="string"&&(k=d.exec(f),k&&(f=j+parseFloat(k[2])*(k[1]==="+"?1:-1))),i[e.idx]=f,this[h](i)))}})}),f.hook=function(b){var c=b.split(" ");l(c,function(b,c){a.cssHooks[c]={set:function(b,d){var e,g,h="";if(d!=="transparent"&&(a.type(d)!=="string"||(e=n(d)))){d=f(e||d);if(!i.rgba&&d._rgba[3]!==1){g=c==="backgroundColor"?b.parentNode:b;while((h===""||h==="transparent")&&g&&g.style)try{h=a.css(g,"backgroundColor"),g=g.parentNode}catch(j){}d=d.blend(h&&h!=="transparent"?h:"_default")}d=d.toRgbaString()}try{b.style[c]=d}catch(j){}}},a.fx.step[c]=function(b){b.colorInit||(b.start=f(b.elem,c),b.end=f(b.end),b.colorInit=!0),a.cssHooks[c].set(b.elem,b.start.transition(b.end,b.pos))}})},f.hook(c),a.cssHooks.borderColor={expand:function(a){var b={};return l(["Top","Right","Bottom","Left"],function(c,d){b["border"+d+"Color"]=a}),b}},k=a.Color.names={aqua:"#00ffff",black:"#000000",blue:"#0000ff",fuchsia:"#ff00ff",gray:"#808080",green:"#008000",lime:"#00ff00",maroon:"#800000",navy:"#000080",olive:"#808000",purple:"#800080",red:"#ff0000",silver:"#c0c0c0",teal:"#008080",white:"#ffffff",yellow:"#ffff00",transparent:[null,null,null,0],_default:"#ffffff"}})(jQuery);


--------------------------------------------------------------------------------
/resources/summvis.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     font-family: 'Roboto', sans-serif;
  3 |     font-weight: 400;
  4 |     line-height: 1.5;
  5 |     color: #262730;
  6 |     font-weight: 400;
  7 | }
  8 | 
  9 | .vis-container {
 10 |     height: 670px;
 11 |     background-color: #F5F7F9;
 12 | }
 13 | 
 14 | .nodisplay {
 15 |     display: none !important;
 16 | }
 17 | 
 18 | .scroll {
 19 |     overflow-y: scroll;
 20 | }
 21 | 
 22 | .doc-container {
 23 |     padding: 10px 20px;
 24 | }
 25 | 
 26 | .horizontal-layout .doc-container {
 27 |     padding-bottom: 0px;
 28 | }
 29 | 
 30 | .vertical-layout .doc-container {
 31 |     float: left;
 32 |     width: 50%;
 33 |     padding-right: 0px;
 34 | }
 35 | 
 36 | .summary-container {
 37 |     padding: 0px 20px;
 38 | }
 39 | 
 40 | .vertical-layout .summary-container {
 41 |     float: left;
 42 |     width: 50%;
 43 |     padding-top: 8px;
 44 | }
 45 | 
 46 | .vertical-layout .main-doc.scroll {
 47 |     height: 610px;
 48 | }
 49 | 
 50 | .main-doc.scroll {
 51 |     scrollbar-width: none;
 52 | }
 53 | 
 54 | /* Works on Chrome, Edge, and Safari */
 55 | .main-doc.scroll::-webkit-scrollbar {
 56 |     width: 0;
 57 | }
 58 | 
 59 | .vertical-layout .proxy-doc {
 60 |     height: 610px;
 61 | }
 62 | 
 63 | .vertical-layout .summary-list.scroll {
 64 |     height: 610px;
 65 | }
 66 | 
 67 | .horizontal-layout .scroll {
 68 |     height: 270px;
 69 | }
 70 | 
 71 | .doc {
 72 |     display: flex;
 73 | }
 74 | 
 75 | .horizontal-layout .doc {
 76 | }
 77 | 
 78 | .main-doc {
 79 |     background-color: white;
 80 |     padding-left: 17px;
 81 |     padding-right: 15px;
 82 |     padding-top: 16px;
 83 |     border-top-left-radius: 4px;
 84 |     border-bottom-left-radius: 4px;
 85 |     flex: 1;
 86 |     border: 1px solid #e9e9e9;
 87 | }
 88 | 
 89 | .display .proxy-scroll {
 90 |     position: absolute;
 91 |     left: 9px;
 92 |     width: 9px;
 93 |     border-radius: 6px;
 94 |     background-color: rgba(0, 0, 0, 0.1);
 95 | }
 96 | 
 97 | .display .proxy-scroll.hover {
 98 |     background-color: rgba(0, 0, 0, 0.2);
 99 | }
100 | 
101 | .proxy-doc {
102 |     flex: 0 0 28px;
103 |     background-color: white;
104 |     position: relative;
105 |     border-bottom-right-radius: 4px;
106 |     border-top-right-radius: 4px;
107 |     padding-left: 3px;
108 |     padding-right: 3px;
109 |     border-top: 1px solid #e9e9e9;
110 |     border-right: 1px solid #e9e9e9;
111 |     border-bottom: 1px solid #e9e9e9;
112 | }
113 | 
114 | .vertical-layout .proxy-doc {
115 |     margin-right: 25px;
116 | }
117 | 
118 | .summary-list {
119 |     border-top: 1px solid #ccc;
120 |     border-bottom: 1px solid #ccc;
121 |     border-radius: 4px;
122 | }
123 | 
124 | .summary-item {
125 |     border-bottom: 1px solid #ccc;
126 |     border-left: 1px solid #ccc;
127 |     border-right: 1px solid #ccc;
128 |     background-color: white;
129 |     padding-top: 16px;
130 |     padding-bottom: 16px;
131 |     padding-left: 23px;
132 |     padding-right: 8px;
133 | }
134 | 
135 | .summary-item:last-child {
136 |     border-bottom: 0px;
137 |     border-bottom-left-radius: 3px;
138 | }
139 | 
140 | .summary-item.selected.selectable {
141 |     border-left: 3px solid #2377E9;
142 |     padding-left: 21px;
143 | }
144 | 
145 | .summary-item.selectable:not(.selected):hover {
146 |     cursor: pointer;
147 |     background-color: #FCFDFF;
148 | }
149 | 
150 | .summary-item.selected.selectable .highlight:not(.annotation-hidden):hover {
151 |     cursor: pointer;
152 | }
153 | 
154 | .summary-item.selected.selectable .underline:not(.annotation-hidden):hover {
155 |     cursor: pointer;
156 | }
157 | 
158 | .summary-item .name {
159 |     margin-bottom: 8px;
160 |     font-weight: 400;
161 | }
162 | 
163 | .summary-item.selected.selectable .name {
164 |     font-weight: 500;
165 | }
166 | 
167 | .inactive {
168 |     opacity: 0.5 !important;
169 | }
170 | 
171 | .stopword.grayed-out {
172 |     opacity: 50%
173 | }
174 | 
175 | .has-lexical-alignment .annotate-novel {
176 |     /* Bold all non-underlined items */
177 |     font-weight: 500;
178 |     color: black;
179 | }
180 | 
181 | .summary-item .stopword {
182 |     font-weight: 400;
183 | }
184 | 
185 | .summary-item .token-underline {
186 |     font-weight: 400;
187 | }
188 | 
189 | .summary-item:not(.selected) .underline, .summary-item:not(.selectable) .underline {
190 |     border-color: #909090 !important;
191 | }
192 | 
193 | .underline.annotation-inactive {
194 |     border-color: #E9E9E9 !important;
195 | }
196 | 
197 | .underline.annotation-invisible {
198 |     border-color: transparent !important;
199 | }
200 | 
201 | .underline.annotation-hidden {
202 |     border: 0px !important;
203 |     margin: 0px !important;
204 | }
205 | 
206 | .proxy-underline.annotation-hidden, .proxy-highlight.annotation-hidden {
207 |     visibility: hidden;
208 | }
209 | 
210 | .proxy-underline.annotation-inactive {
211 |     background-color: #E9E9E9 !important;
212 | }
213 | 
214 | .proxy-underline.annotation-invisible {
215 |     background-color: transparent !important;
216 | }
217 | 
218 | .highlight {
219 |     display: inline-block;
220 | }
221 | 
222 | .highlight.annotation-hidden {
223 |     background: none !important;
224 |     border-color: transparent !important;
225 |     border-bottom: 0px !important;
226 | }
227 | 
228 | .highlight.annotation-invisible {
229 |     background-color: transparent !important;
230 |     border-color: transparent !important;
231 | }
232 | 
233 | .summary-item:not(.selected) .highlight:not(.annotation-hidden),
234 | .summary-item:not(.selectable) .highlight:not(.annotation-hidden) {
235 |     border-color: #909090 !important;
236 | }
237 | 
238 | .highlight.annotation-inactive {
239 |     border-color: #E9E9E9 !important;
240 | }
241 | 
242 | .display .proxy-scroll.hidden {
243 |     visibility: hidden;
244 | }
245 | 
246 | #document-header {
247 |     min-height: 35px;
248 |     margin-bottom: 0px;
249 |     align-items: center;
250 |     color: black;
251 |     display: flex;
252 | }
253 | 
254 | #summary-header {
255 |     display: flex;
256 |     justify-content: space-between;
257 |     align-items: center;
258 |     min-height: 35px;
259 |     margin-bottom: 0px;
260 |     color: black;
261 | }
262 | 
263 | .horizontal-layout #summary-header {
264 |     margin-top: 23px;
265 | }
266 | 
267 | #summary-header-gap {
268 |     flex: 1 0 15px;
269 | }
270 | 
271 | .highlight.selected {
272 |     border-color: transparent !important;
273 | }
274 | 
275 | .highlight:not(.selected), .proxy-highlight:not(.selected) {
276 |     background-color: transparent !important;
277 | }
278 | 
279 | .summary-item.annotate-entities .entity:not(.matches-ngram) {
280 |     color: #fb425c;
281 |     font-weight: 500;
282 | }
283 | 
284 | .summary-item.annotate-lexical .highlight.matches-ngram {
285 |     padding: 0px;
286 |     border-bottom: 0px !important;
287 | }
288 | 
289 | .doc .highlight {
290 |     padding: 0px;
291 |     border: 0px !important;
292 | }
293 | 
294 | ul.annotation-key {
295 |     display: flex;
296 |     align-items: flex-end;
297 |     list-style: none;
298 |     justify-content: flex-start;
299 |     padding: 0px;
300 |     margin: 0px 0px 10px 0px;
301 | }
302 | 
303 | .annotation-key li {
304 |     margin-right: 15px;
305 |     font-size: 13px;
306 |     padding: 6px 13px 6px 13px;
307 | }
308 | 
309 | .annotation-key li.option {
310 |     border-radius: 13px;
311 |     cursor: pointer;
312 |     border: 1px solid #F3F3F3;
313 | }
314 | 
315 | .annotation-key li.option.selected {
316 |     background-color: #F0F2F6;
317 | }
318 | 
319 | .annotation-key-label {
320 |     margin: 0px;
321 |     padding-left: 0px !important;
322 |     padding-right: 0px !important;
323 | }
324 | 
325 | .annotation-key-ngram {
326 |     border-bottom: 3px solid #66c2a5;
327 |     padding-bottom: 1px;
328 | }
329 | 
330 | .annotation-key-semantic {
331 |     border-bottom: 4px dotted #66c2a5;
332 |     padding-bottom: 1px;
333 | }
334 | 
335 | .annotation-key-novel {
336 |     font-weight: 500;
337 |     color: black;
338 | }
339 | 
340 | .annotation-key-entity {
341 |     font-weight: 500;
342 |     color: #fb425c;
343 | }
344 | 
345 | .annotation-key-stopword {
346 |     opacity: 70%;
347 | }
348 | 


--------------------------------------------------------------------------------
/resources/summvis.js:
--------------------------------------------------------------------------------
  1 | $(document).ready(
  2 |     function () {
  3 | 
  4 |         // Define global variables
  5 | 
  6 |         let isDragging = false;
  7 |         let saveDragPos;
  8 | 
  9 |         let rtime;
 10 |         let timeout = false;
 11 |         let delta = 200;
 12 | 
 13 |         let disableScrollEvent = false;
 14 | 
 15 |         let annotateLexical = false;
 16 |         let annotateSemantic = false;
 17 |         let annotateNovel = false;
 18 |         let annotateEntities = false;
 19 | 
 20 |         // Define functions
 21 | 
 22 |         function clamp(number, min, max) {
 23 |             return Math.max(min, Math.min(number, max));
 24 |         }
 25 | 
 26 |         function hasScroll() {
 27 |             const el = $(".display .main-doc");
 28 |             return el.prop("scrollHeight") > el.prop("clientHeight");
 29 |         }
 30 | 
 31 |         function scrollBy(delta) {
 32 |             const proxyDoc = $(".display .proxy-doc");
 33 |             const proxyScroll = proxyDoc.find(".proxy-scroll");
 34 |             const currentTop = parseFloat(proxyScroll.css("top"));
 35 |             const newTop = clamp(currentTop + delta, 0, proxyDoc.innerHeight() - proxyScroll.innerHeight());
 36 |             proxyScroll.css("top", newTop);
 37 |             const mainDoc = $(".display .main-doc");
 38 |             const scaleY = mainDoc[0].scrollHeight / proxyDoc.innerHeight();
 39 |             mainDoc.scrollTop(newTop * scaleY)
 40 |         }
 41 | 
 42 |         function getSpanId(el) {
 43 |             return getSpanIds(el)[0]
 44 |         }
 45 | 
 46 |         function getSpanIds(el) {
 47 |             return el.attr("class").split(/\s+/).filter(function (x) {
 48 |                 return x.startsWith("span-")
 49 |             });
 50 |         }
 51 | 
 52 |         function createProxy() {
 53 |             const mainDoc = $(".display .main-doc");
 54 |             const proxyDoc = $(".display .proxy-doc");
 55 |             const proxyHeight = proxyDoc.innerHeight();
 56 |             const proxyWidth = proxyDoc.innerWidth();
 57 |             const scaleX = 0.8 * proxyWidth / mainDoc.innerWidth();
 58 |             const scaleY = proxyHeight / mainDoc[0].scrollHeight;
 59 |             const scrollTop = mainDoc.scrollTop();
 60 |             const proxyScrollTop = scrollTop * scaleY;
 61 |             const proxyScrollBottom = (scrollTop + mainDoc.innerHeight()) * scaleY;
 62 |             const proxyScrollHeight = proxyScrollBottom - proxyScrollTop;
 63 |             proxyDoc.empty();
 64 | 
 65 |             // Loop through underlines in doc view and create associated proxy element
 66 |             if (annotateLexical) {
 67 |                 $(".display .main-doc .token-underline").each(
 68 |                     function (index, value) {
 69 |                         const el = $(value);
 70 |                         const x = el.position().left;
 71 |                         const y = mainDoc.scrollTop() + el.position().top - mainDoc.position().top;
 72 |                         const newHeight = 3;
 73 |                         const color = el.css("border-bottom-color");
 74 |                         const proxyPadding = proxyDoc.innerWidth() - proxyDoc.width();
 75 |                         const newX = x * scaleX + proxyPadding / 2;
 76 |                         const newY = (y + el.height()) * scaleY - newHeight;
 77 |                         const newWidth = Math.min(
 78 |                             Math.max((el.width() * scaleX) + 1, 5),
 79 |                             proxyDoc.width() + proxyPadding / 2 - newX
 80 |                         );
 81 | 
 82 |                         let classes = "proxy-underline annotation-hidden " + getSpanIds(el).join(" ");
 83 |                         const proxyEl = $('<div/>', {
 84 |                             "class": classes,
 85 |                             "css": {
 86 |                                 "position": "absolute",
 87 |                                 "left": Math.round(newX),
 88 |                                 "top": Math.round(newY),
 89 |                                 "background-color": color,
 90 |                                 "width": newWidth,
 91 |                                 "height": newHeight,
 92 |                             }
 93 |                         }).appendTo(proxyDoc);
 94 |                         proxyEl.data(el.data());
 95 |                     }
 96 |                 );
 97 |             }
 98 | 
 99 |             // Loop through all active highlights in doc view and create associated proxy element
100 |             if (annotateSemantic) {
101 |                 $(".display .main-doc .highlight").each(
102 |                     function (index, value) {
103 |                         const el = $(value);
104 |                         const x = el.position().left;
105 |                         const y = mainDoc.scrollTop() + el.position().top - mainDoc.position().top;
106 |                         const newHeight = 5;
107 |                         const color = el.css("background-color");
108 |                         const proxyPadding = proxyDoc.innerWidth() - proxyDoc.width()
109 |                         const newX = x * scaleX + proxyPadding / 2;
110 |                         const newY = (y + el.height()) * scaleY - newHeight;
111 |                         const newWidth = Math.min(
112 |                             Math.max((el.width() * scaleX) + 1, 5),
113 |                             proxyDoc.width() + proxyPadding / 2 - newX
114 |                         );
115 |                         const proxyEl = $('<div/>', {
116 |                             "class": 'proxy-highlight annotation-hidden',
117 |                             "css": {
118 |                                 "position": "absolute",
119 |                                 "left": Math.round(newX),
120 |                                 "top": Math.round(newY),
121 |                                 "background-color": color,
122 |                                 "width": newWidth,
123 |                                 "height": newHeight,
124 |                             }
125 |                         }).appendTo(proxyDoc);
126 |                         // Copy data attributes
127 |                         proxyEl.data(el.data());
128 |                         // Set classes for matching
129 |                         proxyEl.addClass(el.data("match-classes"))
130 |                     }
131 |                 );
132 |             }
133 |             $('<div/>', {
134 |                 "class": 'proxy-scroll hidden',
135 |                 "css": {
136 |                     "top": proxyScrollTop,
137 |                     "height": proxyScrollHeight,
138 |                 }
139 |             }).appendTo(proxyDoc);
140 |             if (hasScroll()) {
141 |                 $(".display .proxy-scroll").removeClass("hidden")
142 |             }
143 | 
144 |             $(".display .proxy-doc")
145 |                 .mousedown(function (event) {
146 |                     saveDragPos = parseFloat(event.pageY);
147 |                     isDragging = true;
148 |                     event.preventDefault();
149 |                 })
150 |                 .mousemove(function (event) {
151 |                     const dragPos = parseFloat(event.pageY);
152 |                     if (isDragging) {
153 |                         const distanceMoved = dragPos - saveDragPos;
154 |                         scrollBy(distanceMoved);
155 |                         saveDragPos = dragPos;
156 |                         event.preventDefault();
157 |                     }
158 |                 })
159 |                 .mouseup(function (event) {
160 |                     isDragging = false;
161 |                 })
162 |                 .mouseenter(function () {
163 |                     disableScrollEvent = true;
164 |                     $(".display .proxy-scroll").addClass("hover")
165 |                 })
166 |                 .mouseleave(function () {
167 |                     isDragging = false;
168 |                     disableScrollEvent = false;
169 |                     $(".display .proxy-scroll").removeClass("hover")
170 |                 })
171 |                 .on('wheel', function (event) {
172 |                     scrollBy(event.originalEvent.deltaY / 4);
173 |                     event.preventDefault();
174 |                 });
175 | 
176 |             // TODO: Handle user clicking in scroll region
177 | 
178 |             $(".display .main-doc").scroll(function () {
179 |                 if (disableScrollEvent) return;
180 |                 $(".display .proxy-scroll")
181 |                     .css(
182 |                         "top", $(this).scrollTop() * scaleY
183 |                     )
184 |             })
185 |         }
186 | 
187 |         function resizeend() {
188 |             if (new Date() - rtime < delta) {
189 |                 setTimeout(resizeend, delta);
190 |             } else {
191 |                 timeout = false;
192 |                 updateAnnotations();
193 |                 toggleScrollbar();
194 |             }
195 |         }
196 | 
197 |         function toggleScrollbar() {
198 |             if (hasScroll()) {
199 |                 $(".display .proxy-scroll").removeClass("hidden");
200 |             } else {
201 |                 $(".display .proxy-scroll").addClass("hidden");
202 |             }
203 |         }
204 | 
205 |         function updateAnnotations() {
206 | 
207 |             annotateSemantic = $("#option-semantic").hasClass("selected");
208 |             annotateLexical = $("#option-lexical").hasClass("selected");
209 |             annotateEntities = $("#option-entity").hasClass("selected");
210 |             annotateNovel = $("#option-novel").hasClass("selected");
211 | 
212 |             if (annotateSemantic || annotateLexical) {
213 |                 $(".summary-item").addClass("selectable")
214 |             } else {
215 |                 $(".summary-item").removeClass("selectable")
216 |             }
217 | 
218 |             if (annotateLexical) {
219 |                 $(".underline").removeClass("annotation-hidden");
220 |                 $(".summary-item").addClass("annotate-lexical");
221 |             } else {
222 |                 $(".underline").addClass("annotation-hidden");
223 |                 $(".summary-item").removeClass("annotate-lexical");
224 |             }
225 |             if (annotateSemantic) {
226 |                 $(".highlight").removeClass("annotation-hidden");
227 |             } else {
228 |                 $(".highlight").addClass("annotation-hidden");
229 |             }
230 |             if (annotateEntities) {
231 |                 $(".summary-item").addClass("annotate-entities")
232 |             } else {
233 |                 $(".summary-item").removeClass("annotate-entities")
234 |             }
235 |             if (annotateNovel) {
236 |                 $(".summary-item").addClass("annotate-novel")
237 |             } else {
238 |                 $(".summary-item").removeClass("annotate-novel")
239 |             }
240 | 
241 |             createProxy();
242 | 
243 |             if (annotateLexical) {
244 |                 $(".proxy-underline").removeClass("annotation-hidden");
245 |             } else {
246 |                 $(".proxy-underline").addClass("annotation-hidden");
247 |             }
248 |             if (annotateSemantic) {
249 |                 $(".proxy-highlight").removeClass("annotation-hidden");
250 |             } else {
251 |                 $(".proxy-highlight").addClass("annotation-hidden");
252 |             }
253 | 
254 |             $(".summary-item .highlight").tooltip("disable");
255 |             if (annotateSemantic) {
256 |                 $(".summary-item.selected .highlight").tooltip("enable")
257 |             }
258 |         }
259 | 
260 |         function removeDocTooltips() {
261 |             $("[data-tooltip-timestamp]").tooltip("dispose").removeAttr("data-tooltip-timestamp");
262 |         }
263 | 
264 |         function resetUnderlines() {
265 |             $('.annotation-invisible').removeClass("annotation-invisible");
266 |             $('.annotation-inactive').removeClass("annotation-inactive");
267 |             $('.temp-underline-color')
268 |                 .each(function () {
269 |                     $(this).css("border-color", $(this).data("primary-color"));
270 |                 })
271 |                 .removeClass("temp-underline-color")
272 |             $('.temp-proxy-underline-color')
273 |                 .each(function () {
274 |                     $(this).css("background-color", $(this).data("primary-color"));
275 |                 })
276 |                 .removeClass("temp-proxy-underline-color")
277 |         }
278 | 
279 |         function showDocTooltip(el) {
280 |             const topDocHighlightId = $(el).data("top-doc-highlight-id");
281 |             const topDocSim = $(el).data("top-doc-sim");
282 |             const topHighlight = $(`.display .main-doc .highlight[data-highlight-id=${topDocHighlightId}]`);
283 |             if (!isViewable(topHighlight)) {
284 |                 return;
285 |             }
286 |             topHighlight.tooltip({title: `Most similar (${topDocSim})`, trigger: "manual", container: "body"});
287 |             topHighlight.tooltip("show");
288 |             const tooltipTimestamp = Date.now();
289 |             // Do not use .data() method to set data attributes as they are not searchable
290 |             topHighlight.attr("data-tooltip-timestamp", tooltipTimestamp);
291 |             setTimeout(function () {
292 |                 if (topHighlight.data("tooltip-timestamp") == tooltipTimestamp) {
293 |                     topHighlight.tooltip("dispose").removeAttr("data-tooltip-timestamp");
294 |                 }
295 |             }, 8000);
296 |         }
297 | 
298 |         function highlightUnderlines() {
299 |             const spanId = getSpanId($(this));
300 |             const color = $(this).css("border-bottom-color");
301 |             // TODO Consolidate into single statement
302 |             $(`.summary-item.selected .underline.${spanId}`).removeClass("annotation-inactive");
303 |             $(`.doc .underline.${spanId}`)
304 |                 .removeClass("annotation-inactive")
305 |                 .each(function () {
306 |                     $(this).css("border-bottom-color", color);
307 |                 })
308 |                 .addClass("temp-underline-color");
309 |             $(`.proxy-underline.${spanId}`)
310 |                 .removeClass("annotation-inactive")
311 |                 .each(function () {
312 |                     $(this).css("background-color", color);
313 |                 })
314 |                 .addClass("temp-proxy-underline-color");
315 | 
316 |             $(`.summary-item.selected .underline:not(.${spanId})`).addClass("annotation-inactive");
317 |             $(`.doc .underline:not(.${spanId})`).addClass("annotation-inactive");
318 |             $(`.proxy-underline:not(.${spanId})`).addClass("annotation-inactive");
319 | 
320 |             $(".summary-item.selected .highlight:not(.annotation-hidden)").addClass("annotation-inactive");
321 |         }
322 | 
323 |         function resetHighlights() {
324 |             removeDocTooltips();
325 |             $('.summary-item.selected .annotation-inactive').removeClass("annotation-inactive");
326 |             $('.summary-item.selected .annotation-invisible').removeClass("annotation-invisible");
327 |             $('.temp-highlight-color')
328 |                 .each(function () {
329 |                     $(this).css("background-color", $(this).data("primary-color"));
330 |                 })
331 |                 .removeClass("temp-highlight-color");
332 |             $('.highlight.selected').removeClass("selected");
333 |             $('.proxy-highlight.selected').removeClass("selected");
334 |             $('.summary-item [title]').removeAttr("title");
335 |         }
336 | 
337 |         function highlightToken() {
338 |             const highlightId = $(this).data("highlight-id");
339 |             $(`.summary-item.selected .highlight:not(.summary-highlight-${highlightId})`).addClass("annotation-inactive");
340 |             $('.highlight.selected').removeClass("selected")
341 |             $('.proxy-highlight.selected').removeClass("selected")
342 |             const matchedDocHighlight = `.display .main-doc .summary-highlight-${highlightId}`;
343 |             const matchedProxyHighlight = `.proxy-doc .summary-highlight-${highlightId}`;
344 |             $(matchedDocHighlight + ", " + matchedProxyHighlight)
345 |                 .each(function () {
346 |                     const newHighlightColor = $(this).data(`color-${highlightId}`);
347 |                     $(this).css("background-color", newHighlightColor);
348 |                     $(this).addClass("selected");
349 |                 })
350 |                 .addClass("temp-highlight-color");
351 |             $(".underline").addClass("annotation-inactive");
352 |             $(".proxy-underline").addClass("annotation-invisible")
353 |             showDocTooltip(this);
354 |             $(this).addClass("selected");
355 |             $(this).removeClass("annotation-inactive");
356 |             $('.summary-item [title]').removeAttr("title");
357 |             if (!isViewable($(matchedDocHighlight))) {
358 |                 $(this).attr("title", "Click to scroll to most similar word.")
359 |             }
360 |         }
361 | 
362 |         function isViewable(el) {
363 |             const elTop = el.offset().top;
364 |             const elBottom = elTop + el.outerHeight();
365 |             const scrollRegion = $(".display .main-doc");
366 |             const scrollTop = scrollRegion.offset().top;
367 |             const scrollBottom = scrollTop + scrollRegion.outerHeight();
368 |             return elTop > scrollTop && elBottom < scrollBottom;
369 |         }
370 | 
371 |         // Initialization
372 | 
373 |         $(function () {
374 |             $('[data-toggle="tooltip"]').tooltip({
375 |                 // 'boundary': '.summary-container'
376 |                 trigger: 'hover'
377 |             })
378 |         })
379 |         updateAnnotations();
380 | 
381 |         // Bind events
382 | 
383 |         $(window).resize(function () {
384 |             rtime = new Date();
385 |             if (timeout === false) {
386 |                 timeout = true;
387 |                 setTimeout(resizeend, delta);
388 |             }
389 |         });
390 | 
391 |         $(".summary-list").on(
392 |             "click",
393 |             ".summary-item.selectable:not(.selected)",
394 |             function () {
395 |                 const summary_index = $(this).data("index");
396 | 
397 |                 // Update summary items
398 |                 $(".summary-item.selected").removeClass("selected")
399 |                 $(this).addClass("selected")
400 | 
401 |                 // Update doc
402 |                 // Show the version of document aligned with selected summary index
403 |                 $(`.doc[data-index=${summary_index}]`).removeClass("nodisplay").addClass("display");
404 |                 // Hide the version of document not aligned with selected summary index
405 |                 $(`.doc[data-index!=${summary_index}]`).removeClass("display").addClass("nodisplay");
406 | 
407 |                 updateAnnotations();
408 |             }
409 |         );
410 | 
411 |         $("#option-lexical").click(function () {
412 |             $(this).toggleClass("selected")
413 |             updateAnnotations()
414 |         });
415 |         $("#option-semantic").click(function () {
416 |             $(this).toggleClass("selected")
417 |             updateAnnotations()
418 |         });
419 |         $("#option-novel").click(function () {
420 |             $(this).toggleClass("selected")
421 |             updateAnnotations()
422 |         });
423 |         $("#option-entity").click(function () {
424 |             $(this).toggleClass("selected")
425 |             updateAnnotations()
426 |         });
427 | 
428 |         const activeUnderlines = ".summary-item.selected .underline:not(.annotation-inactive):not(.annotation-hidden)";
429 |         $(".summary-list").on(
430 |             "mouseenter",
431 |             activeUnderlines,
432 |             function () {
433 |                 highlightUnderlines.call(this);
434 |             }
435 |         );
436 | 
437 |         $(".summary-list").on(
438 |             "mouseleave",
439 |             activeUnderlines,
440 |             resetUnderlines
441 |         );
442 |         $(".summary-list").on(
443 |             "click",
444 |             activeUnderlines,
445 |             function () {
446 |                 // Find aligned underline in doc  and scroll doc to that position
447 |                 highlightUnderlines.call(this);
448 |                 const mainDoc = $(".display .main-doc");
449 |                 const spanId = getSpanId($(this));
450 |                 const matchedUnderline = $(`.doc .underline.${spanId}`);
451 |                 mainDoc.animate({
452 |                         scrollTop: mainDoc.scrollTop() +
453 |                             matchedUnderline.offset().top - mainDoc.offset().top - 60
454 |                     },
455 |                     300
456 |                 )
457 |             }
458 |         );
459 | 
460 |         const activeHighlights = ".summary-item.selected .highlight:not(.annotation-hidden):not(.matches-ngram), " +
461 |             ".summary-item.selected:not(.annotate-lexical) .highlight:not(.annotation-hidden)";
462 |         $(".summary-list").on(
463 |             "mouseenter",
464 |             activeHighlights,
465 |             function () {
466 |                 highlightToken.call(this);
467 |             })
468 |         $(".summary-list").on(
469 |             "mouseleave",
470 |             activeHighlights,
471 |             function () {
472 |                 resetHighlights();
473 |                 resetUnderlines();
474 |             }
475 |         );
476 |         $(".summary-list").on(
477 |             "click",
478 |             activeHighlights,
479 |             function () {
480 |                 highlightToken.call(this);
481 |                 // Find corresponding highlight in doc representing max similarity and scroll doc to that position
482 |                 const topDocHighlightId = $(this).data("top-doc-highlight-id");
483 |                 removeDocTooltips(topDocHighlightId);
484 |                 const topDocHighlight = $(`.display .main-doc .highlight[data-highlight-id=${topDocHighlightId}]`);
485 |                 const mainDoc = $(".display .main-doc");
486 |                 const el = this;
487 |                 mainDoc.animate({
488 |                         scrollTop: mainDoc.scrollTop() +
489 |                             topDocHighlight.offset().top - mainDoc.offset().top - 60
490 |                     },
491 |                     300,
492 |                     function () {
493 |                         setTimeout(
494 |                             function () {
495 |                                 // If no other tooltips have since been displayed
496 |                                 if ($("[data-tooltip-timestamp]").length == 0) {
497 |                                     showDocTooltip(el);
498 |                                 } else {
499 |                                     console.log("Not showing tooltip because one already exists")
500 |                                 }
501 |                             },
502 |                             100
503 |                         )
504 |                     }
505 |                 )
506 |             }
507 |         );
508 |         $(".summary-list").on(
509 |             "mouseleave",
510 |             ".summary-item.selected .content",
511 |             function () {
512 |                 resetHighlights();
513 |                 resetUnderlines();
514 |             },
515 |         );
516 |     }
517 | );
518 | 
519 | 


--------------------------------------------------------------------------------
/summvis.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import operator
  4 | import os
  5 | import re
  6 | from pathlib import Path
  7 | 
  8 | import spacy
  9 | import spacy.lang.en
 10 | import streamlit as st
 11 | from meerkat import DataPanel
 12 | from spacy.tokens import Doc
 13 | 
 14 | from align import NGramAligner, BertscoreAligner, StaticEmbeddingAligner
 15 | from components import MainView
 16 | from utils import clean_text
 17 | 
 18 | MIN_SEMANTIC_SIM_THRESHOLD = 0.1
 19 | MAX_SEMANTIC_SIM_TOP_K = 10
 20 | 
 21 | Doc.set_extension("name", default=None, force=True)
 22 | Doc.set_extension("column", default=None, force=True)
 23 | 
 24 | 
 25 | class Instance():
 26 |     def __init__(self, id_, document, reference, preds, data=None):
 27 |         self.id = id_
 28 |         self.document = document
 29 |         self.reference = reference
 30 |         self.preds = preds
 31 |         self.data = data
 32 | 
 33 | 
 34 | @st.cache(allow_output_mutation=True)
 35 | def load_from_index(filename, index):
 36 |     with open(filename) as f:
 37 |         for i, line in enumerate(f):
 38 |             if i == index:
 39 |                 return json.loads(line.strip())
 40 | 
 41 | 
 42 | def _nlp_key(x: spacy.Language):
 43 |     return str(x.path)
 44 | 
 45 | 
 46 | @st.cache(allow_output_mutation=True, hash_funcs={spacy.lang.en.English: _nlp_key})
 47 | def load_dataset(path: str, nlp: spacy.Language):
 48 |     if path.endswith('.jsonl'):
 49 |         return DataPanel.from_jsonl(path)
 50 |     try:
 51 |         return DataPanel.read(path, nlp=nlp)
 52 |     except NotADirectoryError:
 53 |         return DataPanel.from_jsonl(path)
 54 | 
 55 | 
 56 | @st.cache(allow_output_mutation=True)
 57 | def get_nlp():
 58 |     try:
 59 |         nlp = spacy.load("en_core_web_lg")
 60 |     except:
 61 |         nlp = spacy.load("en_core_web_sm")
 62 |         is_lg = False
 63 |     else:
 64 |         is_lg = True
 65 |     nlp.add_pipe('sentencizer', before="parser")
 66 |     return nlp, is_lg
 67 | 
 68 | 
 69 | def retrieve(dataset, index, filename=None):
 70 |     if index >= len(dataset):
 71 |         st.error(f"Index {index} exceeds dataset length.")
 72 | 
 73 |     eval_dataset = None
 74 |     if filename:
 75 |         # TODO Handle this through dedicated fields
 76 |         if "cnn_dailymail" in filename:
 77 |             eval_dataset = "cnndm"
 78 |         elif "xsum" in filename:
 79 |             eval_dataset = "xsum"
 80 | 
 81 |     data = dataset[index]
 82 |     id_ = data.get('id', '')
 83 | 
 84 |     try:
 85 |         document = data['spacy:document']
 86 |     except KeyError:
 87 |         if not is_lg:
 88 |             st.error("'en_core_web_lg model' is required unless loading from cached file."
 89 |                      "To install: 'python -m spacy download en_core_web_lg'")
 90 |         try:
 91 |             text = data['document']
 92 |         except KeyError:
 93 |             text = data['article']
 94 |         if not text:
 95 |             st.error("Document is blank")
 96 |             return
 97 |         document = nlp(text if args.no_clean else clean_text(text))
 98 |     document._.name = "Document"
 99 |     document._.column = "document"
100 | 
101 |     try:
102 |         reference = data['spacy:summary:reference']
103 | 
104 |     except KeyError:
105 |         if not is_lg:
106 |             st.error("'en_core_web_lg model' is required unless loading from cached file."
107 |                      "To install: 'python -m spacy download en_core_web_lg'")
108 |         try:
109 |             text = data['summary'] if 'summary' in data else data['summary:reference']
110 |         except KeyError:
111 |             text = data.get('highlights')
112 |         if text:
113 |             reference = nlp(text if args.no_clean else clean_text(text))
114 |         else:
115 |             reference = None
116 |     if reference is not None:
117 |         reference._.name = "Reference"
118 |         reference._.column = "summary:reference"
119 | 
120 |     model_names = set()
121 |     for k in data:
122 |         m = re.match('(preprocessed_)?summary:(?P<model>.*)', k)
123 |         if m:
124 |             model_name = m.group('model')
125 |             if model_name != 'reference':
126 |                 model_names.add(model_name)
127 | 
128 |     preds = []
129 |     for model_name in model_names:
130 |         try:
131 |             pred = data[f"spacy:summary:{model_name}"]
132 |         except KeyError:
133 |             if not is_lg:
134 |                 st.error("'en_core_web_lg model' is required unless loading from cached file."
135 |                          "To install: 'python -m spacy download en_core_web_lg'")
136 |             text = data[f"summary:{model_name}"]
137 |             pred = nlp(text if args.no_clean else clean_text(text))
138 | 
139 |         parts = model_name.split("-")
140 |         primary_sort = 0
141 |         if len(parts) == 2:
142 |             model, train_dataset = parts
143 |             if train_dataset == eval_dataset:
144 |                 formatted_model_name = model.upper()
145 |             else:
146 |                 formatted_model_name = f"{model.upper()} ({train_dataset.upper()}-trained)"
147 |                 if train_dataset in ["xsum", "cnndm"]:
148 |                     primary_sort = 1
149 |                 else:
150 |                     primary_sort = 2
151 |         else:
152 |             formatted_model_name = model_name.upper()
153 |         pred._.name = formatted_model_name
154 |         pred._.column = f"summary:{model_name}"
155 |         preds.append(
156 |             ((primary_sort, formatted_model_name), pred)
157 |         )
158 | 
159 |     preds = [pred for _, pred in sorted(preds)]
160 | 
161 |     return Instance(
162 |         id_=id_,
163 |         document=document,
164 |         reference=reference,
165 |         preds=preds,
166 |         data=data,
167 |     )
168 | 
169 | 
170 | def filter_alignment(alignment, threshold, top_k):
171 |     filtered_alignment = {}
172 |     for k, v in alignment.items():
173 |         filtered_matches = [(match_idx, score) for match_idx, score in v if score >= threshold]
174 |         if filtered_matches:
175 |             filtered_alignment[k] = sorted(filtered_matches, key=operator.itemgetter(1), reverse=True)[:top_k]
176 |     return filtered_alignment
177 | 
178 | 
179 | def select_comparison(example):
180 |     all_summaries = []
181 | 
182 |     if example.reference:
183 |         all_summaries.append(example.reference)
184 |     if example.preds:
185 |         all_summaries.extend(example.preds)
186 | 
187 |     from_documents = [example.document]
188 |     if example.reference:
189 |         from_documents.append(example.reference)
190 |     document_names = [document._.name for document in from_documents]
191 |     select_document_name = sidebar_placeholder_from.selectbox(
192 |         label="Comparison FROM:",
193 |         options=document_names
194 |     )
195 |     document_index = document_names.index(select_document_name)
196 |     selected_document = from_documents[document_index]
197 | 
198 |     remaining_summaries = [summary for summary in all_summaries if
199 |                            summary._.name != selected_document._.name]
200 |     remaining_summary_names = [summary._.name for summary in remaining_summaries]
201 | 
202 |     selected_summary_names = sidebar_placeholder_to.multiselect(
203 |         'Comparison TO:',
204 |         remaining_summary_names,
205 |         remaining_summary_names
206 |     )
207 |     selected_summaries = []
208 |     for summary_name in selected_summary_names:
209 |         summary_index = remaining_summary_names.index(summary_name)
210 |         selected_summaries.append(remaining_summaries[summary_index])
211 |     return selected_document, selected_summaries
212 | 
213 | 
214 | def show_main(example):
215 |     # Get user input
216 | 
217 |     semantic_sim_type = st.sidebar.radio(
218 |         "Semantic similarity type:",
219 |         ["Contextual embedding", "Static embedding"]
220 |     )
221 |     semantic_sim_threshold = st.sidebar.slider(
222 |         "Semantic similarity threshold:",
223 |         min_value=MIN_SEMANTIC_SIM_THRESHOLD,
224 |         max_value=1.0,
225 |         step=0.1,
226 |         value=0.2,
227 |     )
228 |     semantic_sim_top_k = st.sidebar.slider(
229 |         "Semantic similarity top-k:",
230 |         min_value=1,
231 |         max_value=MAX_SEMANTIC_SIM_TOP_K,
232 |         step=1,
233 |         value=10,
234 |     )
235 | 
236 |     document, summaries = select_comparison(example)
237 |     layout = st.sidebar.radio("Layout:", ["Vertical", "Horizontal"]).lower()
238 |     scroll = True
239 |     gray_out_stopwords = st.sidebar.checkbox(label="Gray out stopwords", value=True)
240 | 
241 |     # Gather data
242 |     try:
243 |         lexical_alignments = [
244 |             example.data[f'{NGramAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
245 |             for summary in summaries
246 |         ]
247 |     except KeyError:
248 |         lexical_alignments = NGramAligner().align(document, summaries)
249 | 
250 |     if semantic_sim_type == "Static embedding":
251 |         try:
252 |             semantic_alignments = [
253 |                 example.data[f'{StaticEmbeddingAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
254 |                 for summary in summaries
255 |             ]
256 |         except KeyError:
257 |             semantic_alignments = StaticEmbeddingAligner(
258 |                 semantic_sim_threshold,
259 |                 semantic_sim_top_k).align(
260 |                 document,
261 |                 summaries
262 |             )
263 |     else:
264 |         try:
265 |             semantic_alignments = [
266 |                 example.data[f'{BertscoreAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
267 |                 for summary in summaries
268 |             ]
269 |         except KeyError:
270 |             semantic_alignments = BertscoreAligner(semantic_sim_threshold,
271 |                                                    semantic_sim_top_k).align(document,
272 |                                                                              summaries)
273 | 
274 |     MainView(
275 |         document,
276 |         summaries,
277 |         semantic_alignments,
278 |         lexical_alignments,
279 |         layout,
280 |         scroll,
281 |         gray_out_stopwords,
282 |     ).show(height=720)
283 | 
284 | 
285 | if __name__ == "__main__":
286 | 
287 |     st.set_page_config(layout="wide")
288 | 
289 |     parser = argparse.ArgumentParser()
290 |     parser.add_argument('--path', type=str, default='data')
291 |     parser.add_argument('--no_clean', action='store_true', default=False,
292 |                         help="Do not clean text (remove extraneous spaces, newlines).")
293 |     args = parser.parse_args()
294 | 
295 |     nlp, is_lg = get_nlp()
296 | 
297 |     path = Path(args.path)
298 |     path_dir = path.parent
299 |     all_files = set(map(os.path.basename, path_dir.glob('*')))
300 |     files = sorted([
301 |         fname for fname in all_files if not (fname.endswith(".py") or fname.startswith("."))
302 |     ])
303 |     if path.is_file:
304 |         try:
305 |             file_index = files.index(path.name)
306 |         except:
307 |             raise FileNotFoundError(f"File not found: {path.name}")
308 |     else:
309 |         file_index = 0
310 |     col1, col2 = st.beta_columns((3, 1))
311 |     filename = col1.selectbox(label="File:", options=files, index=file_index)
312 |     dataset = load_dataset(str(path_dir / filename), nlp=nlp)
313 | 
314 |     dataset_size = len(dataset)
315 |     query = col2.number_input(f"Index (Size: {dataset_size}):", value=0, min_value=0, max_value=dataset_size - 1)
316 | 
317 |     sidebar_placeholder_from = st.sidebar.empty()
318 |     sidebar_placeholder_to = st.sidebar.empty()
319 | 
320 |     if query is not None:
321 |         example = retrieve(dataset, query, filename)
322 |         if example:
323 |             show_main(example)
324 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def clean_text(text):
 5 |     split_punct = re.escape(r'()')
 6 |     return ' '.join(re.findall(rf"[^\s{split_punct}]+|[{split_punct}]", text))
 7 |     # Ensure parentheses are probably separated by spaCy tokenizer for CNN/DailyMail dataset.
 8 |     return text.replace("(", "( ").replace(")", ") ")
 9 | 
10 | 


--------------------------------------------------------------------------------
/website/annotations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/annotations.png


--------------------------------------------------------------------------------
/website/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/demo.gif


--------------------------------------------------------------------------------
/website/main-vis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/main-vis.jpg


--------------------------------------------------------------------------------
/website/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/title.png


--------------------------------------------------------------------------------
/website/triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robustness-gym/summvis/65385cfabd7ceb62d2b58a0f66828b80038b80c8/website/triangle.png


--------------------------------------------------------------------------------