├── experiments
    ├── joint
    │   ├── .DS_Store
    │   ├── model.py
    │   └── main.py
    ├── pipeline
    │   ├── .DS_Store
    │   └── main.py
    └── prepare_data.py
├── README.md
└── LICENSE-CC-BY-NC-ND


/experiments/joint/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jvladika/HealthFC/HEAD/experiments/joint/.DS_Store


--------------------------------------------------------------------------------
/experiments/pipeline/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jvladika/HealthFC/HEAD/experiments/pipeline/.DS_Store


--------------------------------------------------------------------------------
/experiments/prepare_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.model_selection import StratifiedKFold
 4 | 
 5 | class Entry:
 6 |     def __init__(self, cid, claim, explanation, label, sentences, rationales, order):
 7 |         self.cid = cid
 8 |         self.claim = claim
 9 |         self.explanation = explanation
10 |         self.label = label
11 |         self.sentences = sentences
12 |         self.rationales = rationales
13 |         self.order = order
14 | 
15 | df = pd.read_csv("healthFC_annotated.csv")
16 | entries = list()
17 | 
18 | for idx, row in df.iterrows():
19 |     claim = row['en_claim']
20 |     explanation = row['en_explanation']
21 |     label = row['label']
22 |     sentences = row['en_sentences']
23 |     ids = row['en_ids']
24 |     
25 |     e = Entry(idx, claim, explanation, label, sentences, ids, ids)
26 |     entries.append(e)
27 |     
28 | 
29 | X = list()
30 | y = list()
31 | for idx in range(len(entries)):
32 |     X.append(entries[idx])
33 |     y.append(int(entries[idx].label))
34 |     
35 | X = np.array(X)
36 | y = np.array(y)
37 | 
38 | skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
39 | skf.get_n_splits(X, y)
40 | 
41 | entries = np.array(entries)
42 | folds = list()
43 | 
44 | explanations = np.array([e.explanation for e in entries])
45 | expl_folds = list()
46 | 
47 | for i, (train_index, test_index) in enumerate(skf.split(X, y)):
48 |     print(f"Fold {i}:")
49 |     print(f"  Train: index={train_index}")
50 |     print(f"  Test:  index={test_index}")
51 |     
52 |     fold_train = entries[train_index]
53 |     fold_test = entries[test_index]
54 |     folds.append((fold_train, fold_test))
55 |     
56 |     expl_fold_train = explanations[train_index]
57 |     expl_fold_test = explanations[test_index]
58 |     expl_folds.append((expl_fold_train, expl_fold_test))
59 |     #google_fold_train = full_evidence[train_index]
60 |     #google_fold_test = full_evidence[test_index]
61 |     #google_folds.append((google_fold_train, google_fold_test))
62 | 
63 | len(folds)
64 | 
65 | from ast import literal_eval
66 | 
67 | for idx in range(len(entries)):
68 |     entries[idx].sentences = literal_eval(entries[idx].sentences)
69 |     
70 | for idx in range(len(entries)):
71 |     entries[idx].order = [int(i) for i in entries[idx].rationales[1:-1].split(", ")]
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Code of the research paper "[HealthFC: Verifying Health Claims with Evidence-Based Medical Fact-Checking](https://aclanthology.org/2024.lrec-main.709/)", accepted to LREC-COLING 2024, can be found in the _experiments_ folder.
 2 | 
 3 | The dataset _Medizin-transparent_ used for experiments in the paper can be found under _Datensatz.csv_. The dataset is free to use for research purposes under the license and terms described below.
 4 | 
 5 | Feel free to reach out for any questions or comments.
 6 | 
 7 | 
 8 | ## Dataset Elements
 9 | 
10 | - **en/de_claim**: Health claim posed in form of a research question
11 | - **en/de_top_sentences**: Up to five most important sentences for determining claim veracity from the article text
12 | - **en/de_explanation**: Explanation of the final claim verdict in form of a short summary paragraph
13 | 
14 | - **verdict**: Original verdict on the claim from the medical team
15 | - **label**: Verdict mapped to one of the three labels: Supported (0), Not enough information (1), Refuted (2)
16 | 
17 | - **title**: Original title of the article
18 | - **date**: Date of article creation or the latest update date.
19 | - **author**: Authors of the article.
20 | - **url**: URL that contains the full article text.
21 | 
22 | 
23 | ## Dataset License 
24 | (EN) The dataset _Medizin-transparent_  is licensed under a
25 | [Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License][cc-by-nc-nd].
26 | 
27 | (DE) Der Datensatz _Medizin-transparent_ unterliegt den Bestimmungen einer 
28 | [Creative Commons Namensnennung-Nicht kommerziell-Keine Bearbeitungen 4.0 International-Lizenz](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.de).
29 | 
30 | [![CC BY-NC-ND 4.0][cc-by-nc-nd-image]][cc-by-nc-nd] [![CC BY-NC-ND 4.0][cc-by-nc-nd-shield]][cc-by-nc-nd]
31 | 
32 | [cc-by-nc-nd-DE]: http://creativecommons.org/licenses/by-nc-sa/4.0/deed.de
33 | [cc-by-nc-nd]: http://creativecommons.org/licenses/by-nc-nd/4.0/
34 | [cc-by-nc-nd-image]: https://licensebuttons.net/l/by-nc-nd/4.0/88x31.png
35 | [cc-by-nc-nd-shield]: https://img.shields.io/badge/License-CC%20BY--NC--ND%204.0-lightgrey.svg
36 | 
37 | ## Dataset Attribution
38 | 
39 | (EN) The article texts used for construction of the dataset were written by:
40 | 
41 | - University for Continuing Education Krems (Danube University Krems))
42 | - Medizin-transparent article authors included in the dataset: Bernd Kerschner, Jana Meixner, Teresa König, Iris Hinneburg, Julia Harlfinger, Claudia Christof, Jörg Wipplinger, Iris Mair, Verena Ahne, Tanja Wolf, Björn Bernitt, Lilith Teusch
43 | 
44 | Please ensure proper attribution when using this dataset by including the above information.
45 | 
46 | -----
47 | 
48 | (DE) Die Artikeltexte, die für die Erstellung des Datensatzes verwendet wurden, stammen von:
49 | 
50 | - Universität für Weiterbildung Krems (Donau-Universität Krems)
51 | - Medizin-transparent-Artikel-Autor*innen, die im Datensatz vorkommen: Bernd Kerschner, Jana Meixner, Teresa König, Iris Hinneburg, Julia Harlfinger, Claudia Christof, Jörg Wipplinger, Iris Mair, Verena Ahne, Tanja Wolf, Björn Bernitt, Lilith Teusch
52 | 
53 | Bitte achten Sie bei der Verwendung dieses Datensatzes auf die korrekte Zuordnung der Daten und geben Sie die oben genannten Informationen an.
54 | 
55 | Medizin transparent wurde u.a. finanziert durch den Niederösterreichischen Gesundheits- und Sozialfonds (NÖGUS) sowie die Bundesgesundheitsagentur (BGA) in Österreich. Informationen zu diesen und weiteren Fördergebern unter https://medizin-transparent.at
56 | 
57 | ## Study Citation
58 | 
59 | To cite the research study HealthFC in LaTeX bib, please use:
60 | ```
61 | @inproceedings{vladika-etal-2024-healthfc-verifying,
62 |     title = "{H}ealth{FC}: Verifying Health Claims with Evidence-Based Medical Fact-Checking",
63 |     author = "Vladika, Juraj  and
64 |       Schneider, Phillip  and
65 |       Matthes, Florian",
66 |     editor = "Calzolari, Nicoletta  and
67 |       Kan, Min-Yen  and
68 |       Hoste, Veronique  and
69 |       Lenci, Alessandro  and
70 |       Sakti, Sakriani  and
71 |       Xue, Nianwen",
72 |     booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
73 |     month = may,
74 |     year = "2024",
75 |     address = "Torino, Italy",
76 |     publisher = "ELRA and ICCL",
77 |     url = https://aclanthology.org/2024.lrec-main.709,
78 |     pages = "8095--8107",
79 | }
80 | ```
81 | 


--------------------------------------------------------------------------------
/experiments/pipeline/main.py:
--------------------------------------------------------------------------------
  1 | import gc
  2 | import torch
  3 | from sklearn.model_selection import train_test_split
  4 | from transformers import Trainer, TrainingArguments
  5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification
  6 | 
  7 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
  8 | 
  9 | def compute_metrics(pred):
 10 |     labels = pred.label_ids
 11 |     preds = pred.predictions.argmax(-1)
 12 |     f1 = f1_score(labels, preds, average="macro")
 13 |     acc = accuracy_score(labels, preds)
 14 |     prec = precision_score(labels, preds, average="macro")
 15 |     recall = recall_score(labels, preds, average="macro")
 16 |     return {"accuracy": acc, "precision" : prec, "recall" : recall, "f1": f1}
 17 | 
 18 | 
 19 | class CtDataset(torch.utils.data.Dataset):
 20 |     def __init__(self, encodings, labels):
 21 |         self.encodings = encodings
 22 |         self.labels = labels
 23 | 
 24 |     def __getitem__(self, idx):
 25 |         item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 26 |         item['labels'] = torch.tensor(self.labels[idx])
 27 |         return item
 28 | 
 29 |     def __len__(self):
 30 |         return len(self.labels)
 31 |     
 32 | 
 33 | for fold_idx in range(len(folds)):
 34 |     print("Fold ", fold_idx)
 35 |         
 36 |     fold = folds[fold_idx]
 37 |     fold_train = fold[0]
 38 |     fold_test = fold[1]
 39 |     
 40 |     m = "microsoft/deberta-v3-large"
 41 |     models = list()
 42 | 
 43 |     model = None
 44 |     tokenizer = None
 45 | 
 46 |     torch.cuda.empty_cache()
 47 |     gc.collect()
 48 | 
 49 |     ## Test with gold evidence
 50 |     
 51 |     joint_train = list()
 52 |     labels_train = list()
 53 |     for cid in range(len(fold_train)):
 54 |         entry = fold_train[cid]
 55 |         claim = entry.claim
 56 |         evs_p = entry.sentences
 57 |         ids = entry.order
 58 |         string = claim + " [SEP] "
 59 | 
 60 |         for sid in ids:
 61 |             candidate_sentence = evs_p[sid]
 62 |             string += candidate_sentence + " "
 63 |         
 64 |         joint_train.append(string)  
 65 |         labels_train.append(entry.label)
 66 |     #print(joint_train[:3], labels_train[:3])
 67 | 
 68 |     joint_dev = list()
 69 |     labels_dev = list()
 70 |     for cid in range(len(fold_test)):
 71 |         entry = fold_test[cid]
 72 |         claim = entry.claim
 73 |         evs_p = entry.sentences
 74 |         ids = entry.order
 75 |         string = claim + " [SEP] "
 76 | 
 77 |         for sid in ids:
 78 |             candidate_sentence = evs_p[sid]
 79 |             string += candidate_sentence + " "
 80 |         
 81 |         joint_dev.append(string)    
 82 |         labels_dev.append(entry.label)
 83 |     #print(joint_dev[:3], labels_dev[:3])
 84 | 
 85 |     tokenizer = AutoTokenizer.from_pretrained(m, model_max_length=256)
 86 |     model = AutoModelForSequenceClassification.from_pretrained(m, num_labels=3, ignore_mismatched_sizes=True)
 87 | 
 88 |     trains = tokenizer(joint_train, return_tensors='pt',
 89 |                          truncation_strategy='only_first', add_special_tokens=True, padding=True)
 90 |     tests = tokenizer(joint_dev, return_tensors='pt',
 91 |                          truncation_strategy='only_first', add_special_tokens=True, padding=True)
 92 |     
 93 |     #Convert data into datasets
 94 |     train_dataset = CtDataset(trains, labels_train)
 95 |     test_dataset = CtDataset(tests, labels_dev)
 96 |     
 97 | 
 98 |     batch_size = 4
 99 |     logging_steps = len(fold_train) // batch_size
100 | 
101 |     model_name = f"finetuned-model"
102 | 
103 |     training_args = TrainingArguments(output_dir=model_name,
104 |                                  dataloader_pin_memory=True, dataloader_num_workers=4,
105 |                                 fp16=True,
106 |                                   warmup_ratio=0.06,
107 |                                    gradient_accumulation_steps=4,
108 |                                 num_train_epochs=7,
109 |                                 learning_rate=1e-5,
110 |                                 per_device_train_batch_size=batch_size,
111 |                                 per_device_eval_batch_size=batch_size,
112 |                                 weight_decay=0.01,
113 |                                 evaluation_strategy="epoch",
114 |                                    save_strategy="no",
115 |                                 disable_tqdm=False,
116 |                                 logging_steps=logging_steps,
117 |                                 push_to_hub=False)
118 | 
119 |     trainer = Trainer(model=model, args=training_args,
120 |                     compute_metrics=compute_metrics,
121 |                     train_dataset=train_dataset,
122 |                     eval_dataset=test_dataset,
123 |                     tokenizer=tokenizer)
124 | 
125 |     print(m)
126 |     trainer.train();
127 |     
128 |     
129 |     ## Test with selected evidence
130 |     
131 |     fold_evidence = selected_evidence[fold_idx]
132 |     
133 |     unsorted_fold_test = unsorted_folds[fold_idx][1]
134 |     joint_test = list()
135 |     for cid in range(len(unsorted_fold_test)):
136 |         entry = unsorted_fold_test[cid]
137 |         claim = entry.claim
138 |         evs_p = entry.sentences
139 | 
140 |         for sid in range(len(evs_p)):
141 |             candidate_sentence = evs_p[sid]
142 |             joint = candidate_sentence + " [SEP] " + claim
143 |             joint_test.append(joint)
144 | 
145 | 
146 |     nli_test = list()
147 |     idx = 0
148 |     for cid in range(len(fold_test)):
149 |         entry = fold_test[cid]
150 |         claim = entry.claim
151 |         string = claim + " [SEP] "
152 | 
153 |         for i in range(idx, len(joint_test)):
154 |             j = joint_test[i]
155 |             if claim in j:
156 |                 if i in fold_evidence:
157 |                     second = j.split(" [SEP] ")[0]
158 |                     string += second
159 |                     string += " "
160 |             else:
161 |                 idx = i
162 |                 break
163 |         nli_test.append(string)
164 | 
165 | 
166 |     from torch.utils.data import DataLoader
167 | 
168 |     nli_encoded = tokenizer(nli_test, return_tensors='pt',
169 |                              truncation_strategy='only_first', add_special_tokens=True, padding=True)
170 | 
171 |     nli_dataset = CtDataset(nli_encoded, np.zeros(len(nli_test)))
172 | 
173 |     test_loader = DataLoader(nli_dataset, batch_size=8,
174 |                              drop_last=False, shuffle=False, num_workers=4)
175 | 
176 |     model.eval()
177 |     model = model.to("cuda")
178 | 
179 |     result = np.zeros(len(test_loader.dataset))    
180 |     index = 0
181 | 
182 |     with torch.no_grad():
183 |         for batch_num, instances in enumerate(test_loader):
184 |             print(batch_num)
185 |             input_ids = instances["input_ids"].to("cuda")
186 |             attention_mask = instances["attention_mask"].to("cuda")
187 |             logits = model(input_ids=input_ids,
188 |                                           attention_mask=attention_mask)[0]
189 |             probs = logits.softmax(dim=1)
190 | 
191 |             pred = probs.argmax(-1).to("cpu")
192 |             #pp = probs[:,1]
193 |             #ones = torch.where(pp > 0.2)[0] 
194 |             #pred = torch.zeros(len(pp))
195 |             #pred[ones] = 1
196 | 
197 |             result[index : index + pred.shape[0]] = pred.flatten().to("cpu")
198 |             index += pred.shape[0]
199 | 
200 |     y_pred = result
201 |     y_true = list()
202 |     for e in fold_test:
203 |         y_true.append(e.label)
204 |     y_true = np.array(y_true)
205 |     
206 |     from sklearn.metrics import f1_score, precision_score, recall_score
207 | 
208 |     f1 = f1_score(y_true,y_pred,average="macro")
209 |     precision = precision_score(y_true,y_pred,average="macro")
210 |     recall = recall_score(y_true,y_pred,average="macro")
211 |     print("Selected evidence results")
212 |     print("F1 ", f1, " P ", precision, " R ", recall)
213 |     


--------------------------------------------------------------------------------
/experiments/joint/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class ClassificationHead(nn.Module):
  6 |     """Head for sentence-level classification tasks."""
  7 | 
  8 |     def __init__(self, hidden_dim, n_labels, hidden_dropout_prob = 0.1):
  9 |         super().__init__()
 10 |         self.dense = nn.Linear(hidden_dim, hidden_dim)
 11 |         self.dropout = nn.Dropout(hidden_dropout_prob)
 12 |         self.out_proj = nn.Linear(hidden_dim, n_labels)
 13 | 
 14 |     def forward(self, x, **kwargs):
 15 |         x = self.dropout(x)
 16 |         x = self.dense(x)
 17 |         x = torch.tanh(x)
 18 |         x = self.dropout(x)
 19 |         x = self.out_proj(x)
 20 |         return x
 21 | 
 22 | #Applies a linear weighting / self-attention layer.
 23 | class WordAttention(nn.Module):
 24 |     """
 25 |     x: (BATCH_SIZE, N_sentence, N_token, INPUT_SIZE)
 26 |     token_mask: (batch_size, N_sep, N_token)
 27 |     out: (BATCH_SIZE, N_sentence, INPUT_SIZE)
 28 |     mask: (BATCH_SIZE, N_sentence)
 29 |     """
 30 |     def __init__(self, INPUT_SIZE, PROJ_SIZE, dropout = 0.0):
 31 |         super(WordAttention, self).__init__()
 32 | 
 33 |         self.activation = torch.tanh
 34 |         self.att_proj = nn.Linear(INPUT_SIZE, PROJ_SIZE)
 35 |         self.dropout = nn.Dropout(dropout)
 36 |         self.att_scorer = nn.Linear(PROJ_SIZE, 1)
 37 |         
 38 |     def forward(self, x, token_mask):
 39 |         proj_input = self.att_proj(self.dropout(x.view(-1, x.size(-1))))
 40 |         proj_input = self.dropout(self.activation(proj_input))
 41 |         raw_att_scores = self.att_scorer(proj_input).squeeze(-1).view(x.size(0),x.size(1),x.size(2)) # (Batch_size, N_sentence, N_token)
 42 |         att_scores = F.softmax(raw_att_scores.masked_fill((1 - token_mask).bool(), float('-inf')), dim=-1)
 43 |         att_scores = torch.where(torch.isnan(att_scores), torch.zeros_like(att_scores), att_scores) # Replace NaN with 0
 44 |         batch_att_scores = att_scores.view(-1, att_scores.size(-1)) # (Batch_size * N_sentence, N_token)
 45 |         out = torch.bmm(batch_att_scores.unsqueeze(1), x.view(-1, x.size(2), x.size(3))).squeeze(1) 
 46 |         # (Batch_size * N_sentence, INPUT_SIZE)
 47 |         out = out.view(x.size(0), x.size(1), x.size(-1))
 48 |         mask = token_mask[:,:,0]
 49 |         return out, mask
 50 | 
 51 | class DynamicSentenceAttention(nn.Module):
 52 |     """
 53 |     input: (BATCH_SIZE, N_sentence, INPUT_SIZE)
 54 |     output: (BATCH_SIZE, INPUT_SIZE)
 55 |     """
 56 |     def __init__(self, INPUT_SIZE, PROJ_SIZE, REC_HID_SIZE = None, dropout = 0.1):
 57 |         super(DynamicSentenceAttention, self).__init__()
 58 |         self.activation = torch.tanh
 59 |         self.att_proj = nn.Linear(INPUT_SIZE, PROJ_SIZE)
 60 |         self.dropout = nn.Dropout(dropout)
 61 |         
 62 |         if REC_HID_SIZE is not None:
 63 |             self.contextualized = True
 64 |             self.lstm = nn.LSTM(PROJ_SIZE, REC_HID_SIZE, bidirectional = False, batch_first = True)
 65 |             self.att_scorer = nn.Linear(REC_HID_SIZE, 2)
 66 |         else:
 67 |             self.contextualized = False
 68 |             self.att_scorer = nn.Linear(PROJ_SIZE, 2)
 69 |         
 70 |     def forward(self, sentence_reps, sentence_mask, att_scores, valid_scores):
 71 |         # sentence_reps: (BATCH_SIZE, N_sentence, INPUT_SIZE)
 72 |         # sentence_mask: (BATCH_SIZE, N_sentence)
 73 |         # att_scores: (BATCH_SIZE, N_sentence)
 74 |         # valid_scores: (BATCH_SIZE, N_sentence)
 75 |         # result: (BATCH_SIZE, INPUT_SIZE)
 76 |         #att_scores = evidence_out[:,:,1] # (BATCH_SIZE, N_sentence)
 77 |         #valid_scores = evidence_out[:,:,1] > evidence_out[:,:,0] # Only consider sentences predicted as evidences
 78 |         sentence_mask = torch.logical_and(sentence_mask, valid_scores)
 79 |         
 80 | 
 81 |         if sentence_reps.size(0) > 0:
 82 |             att_scores = F.softmax(att_scores.masked_fill((~sentence_mask).bool(), -1e4), dim=-1)
 83 |             result = torch.bmm(att_scores.unsqueeze(1), sentence_reps).squeeze(1)
 84 |             return result 
 85 |         else:
 86 |             return sentence_reps[:,0,:]
 87 | 
 88 | 
 89 | #The final joint model used for the tasks.
 90 | class ModelForSequenceClassification(nn.Module):
 91 |     def __init__(self, base_model, hidden_dim=1024, n_labels=2):
 92 |         super().__init__()
 93 |         
 94 |         #DeBERTa-v3-large hidden size iz 1024.
 95 |         #We use DeBERTa as the base model for encoding the data instances.
 96 |         self.deberta = base_model
 97 |         
 98 |         self.word_attention = WordAttention(hidden_dim, hidden_dim, dropout=0.0)
 99 |         self.evidence_linear = ClassificationHead(hidden_dim=hidden_dim, 
100 |                                         n_labels=n_labels, hidden_dropout_prob=0.0)
101 |         
102 |         self.evidence_criterion = nn.CrossEntropyLoss(ignore_index=2)
103 |         self.nli_criterion = nn.CrossEntropyLoss()
104 |         
105 |         self.sentence_attention = DynamicSentenceAttention(hidden_dim, hidden_dim, dropout=0.0)
106 |         self.nli_linear = ClassificationHead(hidden_dim, 3, hidden_dropout_prob = 0.0)
107 |         
108 |         self.extra_modules = [
109 |             self.sentence_attention,
110 |             self.nli_linear,
111 |             self.evidence_linear,
112 |             self.nli_criterion,
113 |             self.evidence_criterion,
114 |             self.word_attention
115 |         ]            
116 |    
117 |     def select_valid(self, token_reps, token_mask, valid_sentences):
118 |         # token_reps: (BATCH_SIZE, N_sentence, N_token, INPUT_SIZE)
119 |         # token_mask: (BATCH_SIZE, N_sentence, N_token)
120 |         # valid_sentences: (BATCH_SIZE, N_sentence)
121 |     
122 |         #valid_sentences = evidence_out[:,:,1] > evidence_out[:,:,0] # Only consider sentences predicted as evidences
123 |         if valid_sentences.size(1) > token_reps[:,1:,:,:].size(1):
124 |             valid_sentences = valid_sentences[:, :token_reps[:,1:,:,:].size(1)]
125 |                
126 |         evidence_reps = token_reps[:,1:,:,:][valid_sentences]
127 |         evidence_token_mask = token_mask[:,1:,:][valid_sentences]
128 |         evidence_reps = evidence_reps.view(1, evidence_reps.size(0), evidence_reps.size(1), evidence_reps.size(2))
129 |         evidence_token_mask = evidence_token_mask.view(1, evidence_token_mask.size(0), evidence_token_mask.size(1))
130 |         if len(evidence_reps.shape) == 3 or evidence_reps.size(1) == 0:
131 |             evidence_reps = token_reps[:,1,:,:].unsqueeze(1) # First sentence is claim; second is dummy
132 |             evidence_token_mask = token_mask[:,1,:].unsqueeze(1)
133 |         return evidence_reps, evidence_token_mask
134 |         
135 |         
136 |     def forward(
137 |         self,
138 |         encoded,
139 |         attention_mask,
140 |         nli_label, 
141 |         evidence_label,
142 |         transformation_indices,
143 |         sample_p = 1,
144 |         #return_features=True,
145 |         **kwargs
146 |     ):
147 |         batch_indices, indices_by_batch, mask = transformation_indices # (batch_size, N_sep, N_token)
148 |         
149 |        
150 |         # (Batch_size, N_sep, BERT_DIM), (Batch_size, N_sep)
151 | 
152 |         deberta_out = self.deberta(encoded, attention_mask)[0] # (BATCH_SIZE, sequence_len, BERT_DIM)
153 |         deberta_tokens = deberta_out[batch_indices, indices_by_batch, :]
154 |     
155 |         #represent sentences as weighted self-attention reps
156 |         sentence_reps, sentence_mask = self.word_attention(deberta_tokens, mask)         
157 |         
158 |         #logits of linear predictor
159 |         evidence_out = self.evidence_linear(sentence_reps)      
160 |         
161 |         ## New linear
162 |         att_scores = evidence_out[:,:,1] # (BATCH_SIZE, N_sentence)
163 |          
164 |         if bool(torch.rand(1) < sample_p): # Choose sentence according to predicted evidence
165 |             valid_scores = evidence_out[:,:,1] > evidence_out[:,:,0]
166 |         else:
167 |             valid_scores = evidence_label == 1 # Ground truth
168 |             valid_scores = valid_scores[:,:mask.size(1)]
169 |         
170 |         paragraph_rep = self.sentence_attention(sentence_reps, sentence_mask, att_scores, valid_scores) 
171 |         # (BATCH_SIZE, BERT_DIM) 
172 |         
173 |         nli_out = self.nli_linear(paragraph_rep) # (Batch_size, 3)
174 |         
175 |         #for loss calculation
176 |         if evidence_label.size(1) > evidence_out.size(1):
177 |             evidence_label = evidence_label[:,:evidence_out.size(1)]
178 |         evidence_loss = self.evidence_criterion(evidence_out.view(-1, 2), 
179 |                                                       evidence_label.reshape(-1)) # ignore index 2
180 |         
181 |         evidence_preds = (torch.softmax(evidence_out, dim=1)[:, 1] > 0.5).nonzero().flatten()
182 |         
183 |         nli_loss = self.nli_criterion(nli_out, nli_label)
184 |         nli_out = torch.argmax(nli_out.cpu(), dim=-1).detach().numpy().tolist()
185 |             
186 |         return evidence_out, evidence_preds, evidence_loss, nli_out, nli_loss
187 |     
188 |     
189 |     def evaluate(
190 |         self,
191 |         encoded,
192 |         attention_mask,
193 |         transformation_indices,
194 |         **kwargs
195 |     ):
196 |         batch_indices, indices_by_batch, mask = transformation_indices # (batch_size, N_sep, N_token)
197 |        
198 |         deberta_out = self.deberta(encoded, attention_mask)[0] # (BATCH_SIZE, sequence_len, BERT_DIM)
199 |         deberta_tokens = deberta_out[batch_indices, indices_by_batch, :]
200 |     
201 |         #represent sentences as weighted self-attention reps
202 |         sentence_reps, sentence_mask = self.word_attention(deberta_tokens, mask)         
203 |         
204 |         #logits of linear predictor
205 |         evidence_out = self.evidence_linear(sentence_reps)      
206 |         
207 |         att_scores = evidence_out[:,:,1] # (BATCH_SIZE, N_sentence)    
208 |         valid_scores = evidence_out[:,:,1] > evidence_out[:,:,0]
209 |         
210 |         paragraph_rep = self.sentence_attention(sentence_reps, sentence_mask, att_scores, valid_scores) 
211 |         # (BATCH_SIZE, BERT_DIM) 
212 |         
213 |         evidence_preds = (torch.softmax(evidence_out, dim=1)[:, 1] > 0.5).nonzero().flatten()
214 | 
215 |         nli_out = self.nli_linear(paragraph_rep) # (Batch_size, 3)        
216 |         nli_out = torch.argmax(nli_out.cpu(), dim=-1).detach().numpy().tolist()
217 |             
218 |         self.deberta.train()
219 |         return evidence_out, evidence_preds, nli_out
220 |     
221 | 
222 | 
223 | 


--------------------------------------------------------------------------------
/experiments/joint/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from sklearn.metrics import f1_score, precision_score, recall_score
  4 | 
  5 | from tqdm import tqdm
  6 | from torch.utils.data import DataLoader
  7 | from transformers import AutoModel, AutoTokenizer, get_cosine_schedule_with_warmup, AdamW
  8 | 
  9 | from model import ModelForSequenceClassification
 10 | from prepare_joint import generate_joint_data, generate_masks
 11 | 
 12 | 
 13 | DEBERTA_PATH = "microsoft/deberta-v3-large"
 14 | device = torch.device('cuda:0')
 15 | 
 16 | '''
 17 | Torch dataset used for the model. 
 18 | 
 19 | encoded: DeBERTa-encoded representation of a training instance (claim + all sentences)
 20 | labels: evidence labels
 21 | nlis: NLI/entailment labels 
 22 | '''
 23 | class CtDataset(torch.utils.data.Dataset):
 24 |     def __init__(self, encodings, labels, nlis):
 25 |         self.encoded = encodings
 26 |         self.labels = labels
 27 |         self.nlis = nlis
 28 | 
 29 |     def __getitem__(self, idx):
 30 |         item = {key: torch.tensor(val[idx]) for key, val in self.encoded.items()}
 31 |         item['labels'] = self.labels[idx]
 32 |         item['nli'] = self.nlis[idx]
 33 |         return item
 34 | 
 35 |     def __len__(self):
 36 |         return len(self.labels)     
 37 |   
 38 | 
 39 | def batch_evidence_label(labels, padding_idx = 2):
 40 |     max_sent_len = max([len(label) for label in labels])
 41 |     label_matrix = torch.ones(len(labels), max_sent_len) * padding_idx
 42 |     label_list = []
 43 |     for i, label in enumerate(labels):
 44 |         for j, evid in enumerate(label):
 45 |             label_matrix[i,j] = int(evid)
 46 |         label_list.append([int(evid) for evid in label])
 47 |     return label_matrix.long(), label_list
 48 | 
 49 | 
 50 | def batch_sentence_mask(masks):
 51 |     batch_mask = masks
 52 |     padded_batch_mask = list()
 53 |     max_shape = -1
 54 |     for m in batch_mask:
 55 |         if m.size(0) > max_shape:
 56 |             max_shape = m.size(0)
 57 | 
 58 |     padded_batch_mask = list()
 59 |     for m in batch_mask:
 60 |         if m.size(0) < max_shape:
 61 |             expanded = torch.cat((m, torch.zeros((max_shape - m.size(0), m.size(1)))))
 62 |         else:
 63 |             expanded = m
 64 | 
 65 |         expanded = expanded.view(1, expanded.size(0), expanded.size(1))
 66 |         padded_batch_mask.append(expanded)
 67 | 
 68 |     padded_batch_mask = torch.cat(padded_batch_mask)
 69 |     return padded_batch_mask
 70 |     
 71 | 
 72 | def token_idx_by_sentence(input_ids, sep_token_id, model_name):
 73 |     """
 74 |     Compute the token indices matrix of the BERT output.
 75 |     input_ids: (batch_size, paragraph_len)
 76 |     batch_indices, indices_by_batch, mask: (batch_size, N_sentence, N_token)
 77 |     bert_out: (batch_size, paragraph_len,BERT_dim)
 78 |     bert_out[batch_indices,indices_by_batch,:]: (batch_size, N_sentence, N_token, BERT_dim)
 79 |     """
 80 |     padding_idx = -1
 81 |     sep_tokens = (input_ids == sep_token_id).bool()
 82 |     paragraph_lens = torch.sum(sep_tokens,1).numpy().tolist()
 83 |     indices = torch.arange(sep_tokens.size(-1)).unsqueeze(0).expand(sep_tokens.size(0),-1)
 84 |     sep_indices = torch.split(indices[sep_tokens],paragraph_lens)
 85 |     paragraph_lens = []
 86 |     all_word_indices = []
 87 |     for paragraph in sep_indices:
 88 |         if "large" in model_name:
 89 |             paragraph = paragraph[1:]
 90 |         word_indices = [torch.arange(paragraph[i]+1, paragraph[i+1]+1) for i in range(paragraph.size(0)-2)]
 91 |         paragraph_lens.append(len(word_indices))
 92 |         all_word_indices.extend(word_indices)
 93 | 
 94 |     indices_by_sentence = nn.utils.rnn.pad_sequence(all_word_indices, batch_first=True, padding_value=padding_idx)
 95 |     indices_by_sentence_split = torch.split(indices_by_sentence,paragraph_lens)
 96 |     indices_by_batch = nn.utils.rnn.pad_sequence(indices_by_sentence_split, batch_first=True, padding_value=padding_idx)
 97 |     batch_indices = torch.arange(sep_tokens.size(0)).unsqueeze(-1).unsqueeze(-1).expand(-1,indices_by_batch.size(1),indices_by_batch.size(-1))
 98 |     mask = (indices_by_batch>=0) 
 99 | 
100 |     return batch_indices.long(), indices_by_batch.long(), mask.long()
101 | 
102 | #Function for evaluating the model output.
103 | def evaluation(model, dataset, data_masks):
104 |     model.eval()
105 |     evidence_predictions = list()
106 |     evidence_labels = list()
107 |     nli_preds = list()
108 |     nli_labels = list()
109 |     batch_size = 4
110 |     
111 |     with torch.no_grad():
112 |         for i, batch in enumerate(tqdm(DataLoader(dataset, batch_size = 4, shuffle=False))):
113 |             #encoded = batch["encodings"]
114 |             input_ids = batch['input_ids']
115 |             attention_mask = batch['attention_mask']
116 | 
117 |             transformation_indices = token_idx_by_sentence(input_ids, 102, "bert")
118 |             transformation_indices = [tensor.to(device) for tensor in transformation_indices]
119 | 
120 |             input_ids = input_ids.to(device)
121 |             attention_mask = attention_mask.to(device)
122 | 
123 |             padded_evidence_label, evidence_label = batch_evidence_label(batch["labels"], padding_idx = 2)
124 |             sentence_masks = batch_sentence_mask(data_masks[i*batch_size:i*batch_size+batch_size])
125 |             sentence_masks = sentence_masks.to(device)
126 | 
127 |             nli_label = batch["nli"].to(device)
128 |             
129 |             evidence_out, evidence_preds, evidence_loss, nli_out, nli_loss = \
130 |                 model(input_ids, attention_mask, nli_label=nli_label,
131 |                       evidence_label = padded_evidence_label.to(device),
132 |                       transformation_indices=transformation_indices)
133 | 
134 |             batch_labels = batch["labels"]
135 |             
136 |             batch_selected = (torch.softmax(evidence_out, dim=2)[:,:,1] > 0.5).tolist()
137 |             for idx in range(len(batch_selected)):
138 |                 selected = [1 if l else 0 for l in batch_selected[idx]]
139 |                 evidence_predictions.extend(selected)
140 |                 
141 |                 true = [1 if c=="1" else 0 for c in batch_labels[idx]]
142 |                 evidence_labels.extend(true)
143 |                 
144 |                 if len(evidence_labels) > len(evidence_predictions):
145 |                     miss = len(evidence_labels) - len(evidence_predictions)
146 |                     evidence_predictions.extend([0] * miss)
147 |                 elif len(evidence_labels) < len(evidence_predictions):
148 |                     miss = len(evidence_predictions) - len(evidence_labels) 
149 |                     evidence_labels.extend([0] * miss)
150 |                     
151 |                
152 |             nli_labels.extend(nli_label.cpu().numpy().tolist())
153 |             nli_preds.extend(nli_out)
154 | 
155 |     nli_f1 = f1_score(nli_labels,nli_preds, average="macro")
156 |     nli_precision = precision_score(nli_labels,nli_preds,average="macro")
157 |     nli_recall = recall_score(nli_labels,nli_preds,average="macro")
158 |     
159 |     #print(evidence_predictions)
160 |     evidence_f1 = f1_score(evidence_labels,evidence_predictions,average="macro")
161 |     evidence_precision = precision_score(evidence_labels,evidence_predictions,average="macro")
162 |     evidence_recall = recall_score(evidence_labels,evidence_predictions,average="macro")
163 |     return nli_f1, nli_precision, nli_recall, evidence_f1, evidence_precision, evidence_recall
164 |     
165 | 
166 | #Main training loop.
167 | def train():
168 |     #Load the base model.
169 |     deberta = AutoModel.from_pretrained(DEBERTA_PATH)
170 |     deberta = deberta.to(device)
171 | 
172 |     #Instantiate the developed model.
173 |     model = ModelForSequenceClassification(deberta)
174 |     model.to(device)
175 |     settings = [{'params': model.deberta.parameters(), 'lr': 1e-5}]
176 |     for module in model.extra_modules:
177 |         settings.append({'params': module.parameters(), 'lr': 5e-6})
178 |         
179 |     #Load the tokenizer.
180 |     tokenizer = AutoTokenizer.from_pretrained(DEBERTA_PATH, model_max_length=512)
181 | 
182 |     #Prepare and generate all data for the model.
183 |     joint_train, nli_labels_train, evidence_labels_train = generate_joint_data(TRAIN_PATH)
184 |     joint_dev, nli_labels_dev, evidence_labels_dev = generate_joint_data(TEST_PATH)
185 | 
186 |     encoded_train = tokenizer(joint_train, return_tensors='pt',
187 |                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
188 |     encoded_dev = tokenizer(joint_dev, return_tensors='pt',
189 |                         truncation_strategy='only_first', add_special_tokens=True, padding=True)
190 |     train_masks = generate_masks(encoded_train)
191 |     dev_masks = generate_masks(encoded_dev)
192 | 
193 |     train_dataset = CtDataset(encoded_train, evidence_labels_train, nli_labels_train)
194 |     dev_dataset = CtDataset(encoded_dev, evidence_labels_dev, nli_labels_dev)
195 | 
196 |     optimizer = torch.optim.AdamW(settings)
197 |     scheduler = get_cosine_schedule_with_warmup(optimizer, 0, epochs)
198 |     model.train()
199 | 
200 |     #Hyperparameters.
201 |     epochs = 5
202 |     batch_size = 1
203 |     update_step = 10
204 |     NUM_ACCUMULATION_STEPS = 4
205 |     prev_performance = 0
206 | 
207 |     #Main training loop.
208 |     for epoch in range(epochs):
209 |         model.train()
210 | 
211 |         tq = tqdm(DataLoader(train_dataset, batch_size=batch_size, shuffle=False))
212 |         for i, batch in enumerate(tq):
213 |             optimizer.zero_grad()
214 | 
215 |             input_ids = batch['input_ids']
216 |             attention_mask = batch['attention_mask']
217 | 
218 |             transformation_indices = token_idx_by_sentence(input_ids, 2, "bert")
219 |             transformation_indices = [tensor.to(device) for tensor in transformation_indices]
220 |             
221 |             input_ids = input_ids.to(device)
222 |             attention_mask = attention_mask.to(device)
223 |             
224 |             padded_evidence_label, evidence_label = batch_evidence_label(batch["labels"], padding_idx = 2)
225 |             sentence_masks = batch_sentence_mask(train_masks[i*batch_size:i*batch_size+batch_size])
226 |             sentence_masks = sentence_masks.to(device)
227 | 
228 |             nli_label = batch["nli"].to(device)
229 |                 
230 |             evidence_out, evidence_preds, evidence_loss, nli_out, nli_loss = \
231 |                 model(input_ids, attention_mask, sentence_masks, nli_label=nli_label,
232 |                     evidence_label = padded_evidence_label.to(device),
233 |                     transformation_indices=transformation_indices)
234 |                 
235 |             evidence_loss *= 6. #LOSS RATIO
236 |             loss = evidence_loss + nli_loss
237 |             
238 |             loss = loss / NUM_ACCUMULATION_STEPS
239 |             try:
240 |                 loss.backward()
241 |             except:
242 |                 optimizer.zero_grad()
243 |                 continue
244 |             
245 |             if ((i + 1) % NUM_ACCUMULATION_STEPS == 0) or (i + 1 == len(train_dataset)):
246 |                 optimizer.step()
247 |             
248 |             if i % update_step == update_step - 1:
249 |                 print(f'Epoch {epoch}, iter {i}, loss: {round(loss.item(), 4)}')
250 |                 
251 |         scheduler.step()
252 | 
253 |         
254 |         train_score = evaluation(model, train_dataset, train_masks)
255 |         print(f'Epoch {epoch}, train nli f1 p r: %.4f, %.4f, %.4f, evidence f1 p r: %.4f, %.4f, %.4f' % train_score)
256 | 
257 |         dev_score = evaluation(model, dev_dataset, dev_masks)
258 |         print(f'Epoch {epoch}, dev nli f1 p r: %.4f, %.4f, %.4f, evidence f1 p r: %.4f, %.4f, %.4f' % dev_score)
259 | 
260 |         dev_perf = dev_score[0] * dev_score[3]
261 |         print(dev_perf)
262 |         if dev_perf >= prev_performance:
263 |             torch.save(model.state_dict(), "checkpoint.model")
264 |             prev_performance = dev_perf
265 |             print("New model saved.")
266 |         else:
267 |             print("Skip saving model.")
268 | 
269 | 
270 | 
271 | 
272 | 


--------------------------------------------------------------------------------
/LICENSE-CC-BY-NC-ND:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial-NoDerivatives 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial-NoDerivatives 4.0
 58 | International Public License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial-NoDerivatives 4.0 International Public
 63 | License ("Public License"). To the extent this Public License may be
 64 | interpreted as a contract, You are granted the Licensed Rights in
 65 | consideration of Your acceptance of these terms and conditions, and the
 66 | Licensor grants You such rights in consideration of benefits the
 67 | Licensor receives from making the Licensed Material available under
 68 | these terms and conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Copyright and Similar Rights means copyright and/or similar rights
 84 |      closely related to copyright including, without limitation,
 85 |      performance, broadcast, sound recording, and Sui Generis Database
 86 |      Rights, without regard to how the rights are labeled or
 87 |      categorized. For purposes of this Public License, the rights
 88 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 89 |      Rights.
 90 | 
 91 |   c. Effective Technological Measures means those measures that, in the
 92 |      absence of proper authority, may not be circumvented under laws
 93 |      fulfilling obligations under Article 11 of the WIPO Copyright
 94 |      Treaty adopted on December 20, 1996, and/or similar international
 95 |      agreements.
 96 | 
 97 |   d. Exceptions and Limitations means fair use, fair dealing, and/or
 98 |      any other exception or limitation to Copyright and Similar Rights
 99 |      that applies to Your use of the Licensed Material.
100 | 
101 |   e. Licensed Material means the artistic or literary work, database,
102 |      or other material to which the Licensor applied this Public
103 |      License.
104 | 
105 |   f. Licensed Rights means the rights granted to You subject to the
106 |      terms and conditions of this Public License, which are limited to
107 |      all Copyright and Similar Rights that apply to Your use of the
108 |      Licensed Material and that the Licensor has authority to license.
109 | 
110 |   g. Licensor means the individual(s) or entity(ies) granting rights
111 |      under this Public License.
112 | 
113 |   h. NonCommercial means not primarily intended for or directed towards
114 |      commercial advantage or monetary compensation. For purposes of
115 |      this Public License, the exchange of the Licensed Material for
116 |      other material subject to Copyright and Similar Rights by digital
117 |      file-sharing or similar means is NonCommercial provided there is
118 |      no payment of monetary compensation in connection with the
119 |      exchange.
120 | 
121 |   i. Share means to provide material to the public by any means or
122 |      process that requires permission under the Licensed Rights, such
123 |      as reproduction, public display, public performance, distribution,
124 |      dissemination, communication, or importation, and to make material
125 |      available to the public including in ways that members of the
126 |      public may access the material from a place and at a time
127 |      individually chosen by them.
128 | 
129 |   j. Sui Generis Database Rights means rights other than copyright
130 |      resulting from Directive 96/9/EC of the European Parliament and of
131 |      the Council of 11 March 1996 on the legal protection of databases,
132 |      as amended and/or succeeded, as well as other essentially
133 |      equivalent rights anywhere in the world.
134 | 
135 |   k. You means the individual or entity exercising the Licensed Rights
136 |      under this Public License. Your has a corresponding meaning.
137 | 
138 | 
139 | Section 2 -- Scope.
140 | 
141 |   a. License grant.
142 | 
143 |        1. Subject to the terms and conditions of this Public License,
144 |           the Licensor hereby grants You a worldwide, royalty-free,
145 |           non-sublicensable, non-exclusive, irrevocable license to
146 |           exercise the Licensed Rights in the Licensed Material to:
147 | 
148 |             a. reproduce and Share the Licensed Material, in whole or
149 |                in part, for NonCommercial purposes only; and
150 | 
151 |             b. produce and reproduce, but not Share, Adapted Material
152 |                for NonCommercial purposes only.
153 | 
154 |        2. Exceptions and Limitations. For the avoidance of doubt, where
155 |           Exceptions and Limitations apply to Your use, this Public
156 |           License does not apply, and You do not need to comply with
157 |           its terms and conditions.
158 | 
159 |        3. Term. The term of this Public License is specified in Section
160 |           6(a).
161 | 
162 |        4. Media and formats; technical modifications allowed. The
163 |           Licensor authorizes You to exercise the Licensed Rights in
164 |           all media and formats whether now known or hereafter created,
165 |           and to make technical modifications necessary to do so. The
166 |           Licensor waives and/or agrees not to assert any right or
167 |           authority to forbid You from making technical modifications
168 |           necessary to exercise the Licensed Rights, including
169 |           technical modifications necessary to circumvent Effective
170 |           Technological Measures. For purposes of this Public License,
171 |           simply making modifications authorized by this Section 2(a)
172 |           (4) never produces Adapted Material.
173 | 
174 |        5. Downstream recipients.
175 | 
176 |             a. Offer from the Licensor -- Licensed Material. Every
177 |                recipient of the Licensed Material automatically
178 |                receives an offer from the Licensor to exercise the
179 |                Licensed Rights under the terms and conditions of this
180 |                Public License.
181 | 
182 |             b. No downstream restrictions. You may not offer or impose
183 |                any additional or different terms or conditions on, or
184 |                apply any Effective Technological Measures to, the
185 |                Licensed Material if doing so restricts exercise of the
186 |                Licensed Rights by any recipient of the Licensed
187 |                Material.
188 | 
189 |        6. No endorsement. Nothing in this Public License constitutes or
190 |           may be construed as permission to assert or imply that You
191 |           are, or that Your use of the Licensed Material is, connected
192 |           with, or sponsored, endorsed, or granted official status by,
193 |           the Licensor or others designated to receive attribution as
194 |           provided in Section 3(a)(1)(A)(i).
195 | 
196 |   b. Other rights.
197 | 
198 |        1. Moral rights, such as the right of integrity, are not
199 |           licensed under this Public License, nor are publicity,
200 |           privacy, and/or other similar personality rights; however, to
201 |           the extent possible, the Licensor waives and/or agrees not to
202 |           assert any such rights held by the Licensor to the limited
203 |           extent necessary to allow You to exercise the Licensed
204 |           Rights, but not otherwise.
205 | 
206 |        2. Patent and trademark rights are not licensed under this
207 |           Public License.
208 | 
209 |        3. To the extent possible, the Licensor waives any right to
210 |           collect royalties from You for the exercise of the Licensed
211 |           Rights, whether directly or through a collecting society
212 |           under any voluntary or waivable statutory or compulsory
213 |           licensing scheme. In all other cases the Licensor expressly
214 |           reserves any right to collect such royalties, including when
215 |           the Licensed Material is used other than for NonCommercial
216 |           purposes.
217 | 
218 | 
219 | Section 3 -- License Conditions.
220 | 
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 | 
224 |   a. Attribution.
225 | 
226 |        1. If You Share the Licensed Material, You must:
227 | 
228 |             a. retain the following if it is supplied by the Licensor
229 |                with the Licensed Material:
230 | 
231 |                  i. identification of the creator(s) of the Licensed
232 |                     Material and any others designated to receive
233 |                     attribution, in any reasonable manner requested by
234 |                     the Licensor (including by pseudonym if
235 |                     designated);
236 | 
237 |                 ii. a copyright notice;
238 | 
239 |                iii. a notice that refers to this Public License;
240 | 
241 |                 iv. a notice that refers to the disclaimer of
242 |                     warranties;
243 | 
244 |                  v. a URI or hyperlink to the Licensed Material to the
245 |                     extent reasonably practicable;
246 | 
247 |             b. indicate if You modified the Licensed Material and
248 |                retain an indication of any previous modifications; and
249 | 
250 |             c. indicate the Licensed Material is licensed under this
251 |                Public License, and include the text of, or the URI or
252 |                hyperlink to, this Public License.
253 | 
254 |           For the avoidance of doubt, You do not have permission under
255 |           this Public License to Share Adapted Material.
256 | 
257 |        2. You may satisfy the conditions in Section 3(a)(1) in any
258 |           reasonable manner based on the medium, means, and context in
259 |           which You Share the Licensed Material. For example, it may be
260 |           reasonable to satisfy the conditions by providing a URI or
261 |           hyperlink to a resource that includes the required
262 |           information.
263 | 
264 |        3. If requested by the Licensor, You must remove any of the
265 |           information required by Section 3(a)(1)(A) to the extent
266 |           reasonably practicable.
267 | 
268 | 
269 | Section 4 -- Sui Generis Database Rights.
270 | 
271 | Where the Licensed Rights include Sui Generis Database Rights that
272 | apply to Your use of the Licensed Material:
273 | 
274 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
275 |      to extract, reuse, reproduce, and Share all or a substantial
276 |      portion of the contents of the database for NonCommercial purposes
277 |      only and provided You do not Share Adapted Material;
278 | 
279 |   b. if You include all or a substantial portion of the database
280 |      contents in a database in which You have Sui Generis Database
281 |      Rights, then the database in which You have Sui Generis Database
282 |      Rights (but not its individual contents) is Adapted Material; and
283 | 
284 |   c. You must comply with the conditions in Section 3(a) if You Share
285 |      all or a substantial portion of the contents of the database.
286 | 
287 | For the avoidance of doubt, this Section 4 supplements and does not
288 | replace Your obligations under this Public License where the Licensed
289 | Rights include other Copyright and Similar Rights.
290 | 
291 | 
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 | 
294 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 | 
305 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 | 
315 |   c. The disclaimer of warranties and limitation of liability provided
316 |      above shall be interpreted in a manner that, to the extent
317 |      possible, most closely approximates an absolute disclaimer and
318 |      waiver of all liability.
319 | 
320 | 
321 | Section 6 -- Term and Termination.
322 | 
323 |   a. This Public License applies for the term of the Copyright and
324 |      Similar Rights licensed here. However, if You fail to comply with
325 |      this Public License, then Your rights under this Public License
326 |      terminate automatically.
327 | 
328 |   b. Where Your right to use the Licensed Material has terminated under
329 |      Section 6(a), it reinstates:
330 | 
331 |        1. automatically as of the date the violation is cured, provided
332 |           it is cured within 30 days of Your discovery of the
333 |           violation; or
334 | 
335 |        2. upon express reinstatement by the Licensor.
336 | 
337 |      For the avoidance of doubt, this Section 6(b) does not affect any
338 |      right the Licensor may have to seek remedies for Your violations
339 |      of this Public License.
340 | 
341 |   c. For the avoidance of doubt, the Licensor may also offer the
342 |      Licensed Material under separate terms or conditions or stop
343 |      distributing the Licensed Material at any time; however, doing so
344 |      will not terminate this Public License.
345 | 
346 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
347 |      License.
348 | 
349 | 
350 | Section 7 -- Other Terms and Conditions.
351 | 
352 |   a. The Licensor shall not be bound by any additional or different
353 |      terms or conditions communicated by You unless expressly agreed.
354 | 
355 |   b. Any arrangements, understandings, or agreements regarding the
356 |      Licensed Material not stated herein are separate from and
357 |      independent of the terms and conditions of this Public License.
358 | 
359 | 
360 | Section 8 -- Interpretation.
361 | 
362 |   a. For the avoidance of doubt, this Public License does not, and
363 |      shall not be interpreted to, reduce, limit, restrict, or impose
364 |      conditions on any use of the Licensed Material that could lawfully
365 |      be made without permission under this Public License.
366 | 
367 |   b. To the extent possible, if any provision of this Public License is
368 |      deemed unenforceable, it shall be automatically reformed to the
369 |      minimum extent necessary to make it enforceable. If the provision
370 |      cannot be reformed, it shall be severed from this Public License
371 |      without affecting the enforceability of the remaining terms and
372 |      conditions.
373 | 
374 |   c. No term or condition of this Public License will be waived and no
375 |      failure to comply consented to unless expressly agreed to by the
376 |      Licensor.
377 | 
378 |   d. Nothing in this Public License constitutes or may be interpreted
379 |      as a limitation upon, or waiver of, any privileges and immunities
380 |      that apply to the Licensor or You, including from the legal
381 |      processes of any jurisdiction or authority.
382 | 
383 | =======================================================================
384 | 
385 | Creative Commons is not a party to its public
386 | licenses. Notwithstanding, Creative Commons may elect to apply one of
387 | its public licenses to material it publishes and in those instances
388 | will be considered the "Licensor". The text of the Creative Commons
389 | public licenses is dedicated to the public domain under the CC0 Public
390 | Domain Dedication. Except for the limited purpose of indicating that
391 | material is shared under a Creative Commons public license or as
392 | otherwise permitted by the Creative Commons policies published at
393 | creativecommons.org/policies, Creative Commons does not authorize the
394 | use of the trademark "Creative Commons" or any other trademark or logo
395 | of Creative Commons without its prior written consent including,
396 | without limitation, in connection with any unauthorized modifications
397 | to any of its public licenses or any other arrangements,
398 | understandings, or agreements concerning use of licensed material. For
399 | the avoidance of doubt, this paragraph does not form part of the
400 | public licenses.
401 | 
402 | Creative Commons may be contacted at creativecommons.org.
403 | 


--------------------------------------------------------------------------------