├── .gitignore ├── .gitmodules ├── README.md ├── analysis ├── __init__.py ├── cherry_picker.py ├── cox.py ├── db_configs.txt ├── decision_analysis.py ├── dumb_clf.py ├── extract_event_data.py ├── feature_agreement.py ├── feature_analysis.py ├── inter_annotator_agreement.py ├── krippendorff.py ├── nonparametric_survival.py └── survival.R ├── configurations.py ├── data_dump └── MongoDump.zip ├── environment.yml ├── run.py └── templates ├── README.md ├── __init__.py ├── public ├── css │ ├── main.css │ └── radio-button.css └── img │ ├── bot_blue.jpg │ ├── bot_green.jpg │ ├── bot_red.jpg │ └── bot_yellow.jpg ├── src ├── __init__.py ├── api │ ├── __init__.py │ ├── dialogue.py │ ├── dialogue_system_names.py │ └── leaderboard.py ├── mongo_client.py ├── packaging │ ├── __init__.py │ ├── dump_package_uri.py │ ├── naive_strategy.py │ └── ratio_strategy.py ├── scoring_utils.py ├── segment_analysis │ ├── __init__.py │ ├── annotation_statistics.py │ ├── annotator_scores.py │ ├── fooling_analysis.py │ ├── ranking_significance.py │ ├── segmented_bootstrap_sampling.py │ ├── win_function.py │ └── win_significance.py ├── utils.py └── views.py └── static ├── .babelrc ├── __init__.py ├── index.html ├── js ├── api_client.jsx ├── components │ ├── AnnotationForm.jsx │ ├── ContinuousSlider.jsx │ ├── Dialogue.jsx │ ├── DialogueTurn.jsx │ ├── DialougeContainer.jsx │ ├── DialougeDomainFilter.jsx │ ├── DialougeList.jsx │ ├── DialougePackageContainer.jsx │ ├── DialougeSystemFilter.jsx │ ├── EntityForm.jsx │ ├── FinalRatingForm.jsx │ ├── FinalRatingFormSlider.jsx │ ├── Instructions.jsx │ ├── Leaderboard.jsx │ ├── MaxPackages.jsx │ ├── RandomCode.jsx │ ├── SingleAnnotationForm.jsx │ └── legacy │ │ ├── RandomCode.jsx │ │ └── __init__.py ├── index.jsx └── routes.jsx ├── package.json └── webpack.config.js /.gitignore: -------------------------------------------------------------------------------- 1 | config/annotation_app.json 2 | 3 | templates/static/node_modules/ 4 | 5 | 6 | **/bundle.js 7 | */bundle.js 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | **/__pycache__/ 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | __pycache__ 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | .idea/ 99 | 100 | auto_judge/simple_transformer/outputs/* 101 | auto_judge/simple_transformer/cache_dir/* 102 | auto_judge/simple_transformer/runs/* 103 | 104 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "elote"] 2 | path = elote 3 | url = https://github.com/wdm0006/elote.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spot The Bot: A Robust and Efficient Framework for the Evaluation of Conversational Dialogue Systems 2 | 3 | ## General Installation 4 | You need to install a [MongoDB](https://www.mongodb.com/try/download/community) v4.2.9 Server somewhere. All the conversation data is stored there. 5 | Unzip `data_dump/MongoDump.zip` and then import the files into your MongoDB (repeat this process for all 9 files): 6 | 7 | ```bash 8 | mongoimport --db auto_judge_final --collection annotated-dialogues-full-convai2 --file annotated-dialogues-full-convai2.json --jsonArray --username --password 9 | ``` 10 | 11 | You need to install R... 12 | 13 | You need to install Python 3.7, we suggest that you use [Anaconda](https://www.anaconda.com/products/individual): 14 | 15 | ```bash 16 | $ conda env create -f environment.yml 17 | ``` 18 | 19 | Adapt the `config/annotation_app.json ` file as follows: 20 | ```json 21 | { 22 | "host": "ip_address of your MongoDB Server", 23 | "port": "port of mongodb", 24 | "user": "mongodb user name", 25 | "password": "pw of mognodb user", 26 | "database_name": "auto_judge_final", 27 | "package_collection_name": "packed-dialogues-full-{domain_name}", 28 | "sampled_collection_name": "sampled-dialogues-full-{domain_name}", 29 | "labelled_collection_name": "annotated-dialogues-full-{domain_name}", 30 | "local_port": 5003, 31 | "max_package_per_user": 3 32 | } 33 | ``` 34 | 35 | ## Run the Annotation Tool 36 | After you cloned the repository `cd/autojudge_annotaiton`: 37 | 38 | To run the annotation tool: 39 | ```bash 40 | $ python run.py 41 | ``` 42 | 43 | You can access the tool at `localhost:5003` 44 | 45 | ## Ranking 46 | After you cloned the repository `cd/autojudge_annotaiton`: 47 | 48 | To get the Rankings based on Bootstrap Sampling (Table 1): 49 | ```bash 50 | $ python templates\src\segment_analysis\segmented_bootstrap_sampling.py 51 | ``` 52 | 53 | To get the pairwise win rates (Table 1): 54 | ```bash 55 | $ python templates\src\segment_analysis\win_function.py 56 | ``` 57 | To perform the stability experiment (Figure 3a): 58 | ```bash 59 | $ python templates\src\segment_analysis\ranking_significance.py 60 | ``` 61 | 62 | To perform the leave-one-out experiment (Figure 3b): 63 | ```bash 64 | $ python templates\src\segment_analysis\ranking_significance.py -lo 1 65 | ``` 66 | 67 | ## Survival Analysis 68 | 69 | The survival analysis is implemented in R and uses the following packages: 70 | * [survival](https://cran.r-project.org/web/packages/survival/index.html) 71 | * [survminer](https://cran.r-project.org/web/packages/survminer/index.html) (needs a fortran compiler to install) 72 | * [glrt](https://rdrr.io/cran/glrt/man/glrt-package.html) 73 | * [icenReg](https://cran.r-project.org/web/packages/icenReg/index.html) 74 | 75 | To export the survival data from your annotations run `python -m analysis.extract_event_data`. 76 | This will create a csv file `event_data.csv` which is read by the R script. 77 | 78 | Finally, run the R script at `analysis/survival.R`. 79 | 80 | ## IAA 81 | 82 | To run the label agreement analysis on e.g. the convai2 annotations, run 83 | 84 | ```back 85 | $ python analysis/inter_annotator_agreement.py sampled-dialogues-full-convai2.json 86 | ``` 87 | 88 | The annotations are stored in `data_dump/MongoDump.zip` 89 | 90 | ## References 91 | If you use this code, please cite us: 92 | 93 | ``` 94 | @inproceedings{deriu2020spot_the_bot, 95 | title = {{Spot The Bot: A Robust and Efficient Framework for the Evaluation of Conversational Dialogue Systems}}, 96 | author = {Deriu, Jan and Tuggener, Don and von D{\"a}niken, Pius and Campos, Jon Ander and Rodrigo, Alvaro and, Belkacem, Thiziri and Soroa, Aitor and Agirre, Eneko and Cieliebak, Mark}, 97 | booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, 98 | address = {Online}, 99 | year = {2020}, 100 | } 101 | -------------------------------------------------------------------------------- /analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/analysis/__init__.py -------------------------------------------------------------------------------- /analysis/cherry_picker.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Dict, Union, Optional 3 | from collections import defaultdict 4 | import random 5 | 6 | 7 | def cherry_pick(convos: Dict, entity0: Optional[str] = None, entity1: Optional[str] = None) -> None: 8 | convo_list = list(convos.values()) 9 | # random.shuffle(convo_list) 10 | 11 | for convo in convo_list: 12 | bot_0 = convo['system_type0'] 13 | bot_1 = convo['system_type1'] 14 | if entity0 and entity0 not in [bot_0, bot_1]: 15 | continue 16 | if entity1 and entity1 not in [bot_0, bot_1]: 17 | continue 18 | 19 | for exchange_id, annotations in convo['annotations'].items(): 20 | cherry = True 21 | for annotation in annotations: 22 | # Require the full convo to be annotated as not bot in all segments by all annotators 23 | #if not annotation['entity0_annotation']['is_human'] is True or not annotation['entity1_annotation']['is_human'] is True: 24 | if annotation['entity0_annotation']['is_human'] is False or \ 25 | annotation['entity1_annotation']['is_human'] is False: 26 | cherry = False 27 | 28 | if cherry and len(annotations) > 1: 29 | print('Annotation on exchange', exchange_id) 30 | for turn in convo['convo']: 31 | print(turn['id'] + ':\t' + turn['text']) 32 | breakpoint() 33 | 34 | 35 | def pick_example_for_segmentation(convos: Dict) -> None: 36 | for _, convo in convos.items(): 37 | bot_0 = convo['system_type0'] 38 | bot_1 = convo['system_type1'] 39 | 40 | if bot_0 == 'human': 41 | continue 42 | 43 | # analyze annotations per segment, choose convo where annotations change 44 | decision_turns = sorted(convo['annotations'].keys()) 45 | bot_0_first_annotation = convo['annotations'][decision_turns[0]][0]['entity0_annotation']['is_human'] 46 | 47 | if bot_0_first_annotation is not False: 48 | bot_0_last_annotation = convo['annotations'][decision_turns[-1]][0]['entity0_annotation']['is_human'] 49 | if bot_0_last_annotation is False: 50 | for decision_turn in decision_turns: 51 | labels_e1 = [a['entity0_annotation']['is_human'] for a in convo['annotations'][decision_turn]] 52 | labels_e2 = [a['entity1_annotation']['is_human'] for a in convo['annotations'][decision_turn]] 53 | for turn in range(decision_turn*2): 54 | print(convo['convo'][turn]['id'] + ':', convo['convo'][turn]['text']) 55 | print(bot_0, labels_e1, bot_1, labels_e2) 56 | breakpoint() 57 | 58 | 59 | def pick_segments_w_different_annotations(convos: Dict, shuffle: bool = True, max_prints: int = 10) -> None: 60 | 61 | if shuffle: 62 | convos_list = list(convos.items()) 63 | random.shuffle(convos_list) 64 | convos = dict(convos_list) 65 | 66 | sampled_convos = defaultdict(list) 67 | 68 | for convo_id, convo in convos.items(): 69 | bot_0 = convo['system_type0'] 70 | bot_1 = convo['system_type1'] 71 | 72 | if bot_0 == 'human': 73 | continue 74 | 75 | for decision_turn, annotations in convo['annotations'].items(): 76 | 77 | labels_e1 = set([a['entity0_annotation']['is_human'] for a in convo['annotations'][decision_turn]]) 78 | labels_e2 = set([a['entity1_annotation']['is_human'] for a in convo['annotations'][decision_turn]]) 79 | 80 | if len(labels_e1) == 1 and len(labels_e2) == 1: # Enforce annotator agreement 81 | 82 | if (labels_e1 == {True} and labels_e2 == {False}) or (labels_e1 == {False} and labels_e2 == {True}): 83 | 84 | convo_turns = list() 85 | for turn in range(decision_turn*2): 86 | convo_turns.append(convo['convo'][turn]['id'] + ':\t' + convo['convo'][turn]['text']) 87 | 88 | if labels_e1 == {False}: 89 | sampled_convos[bot_0].append(dict(convo=convo_turns, partner=bot_1, convo_id=convo_id)) 90 | else: 91 | sampled_convos[bot_1].append(dict(convo=convo_turns, partner=bot_0, convo_id=convo_id)) 92 | 93 | for bot, convos in sampled_convos.items(): 94 | convos.sort(key=lambda x: len(x['convo']), reverse=True) 95 | for convo in convos[:max_prints]: 96 | partner = convo['partner'] 97 | print(convo['convo_id']) 98 | print(f'{bot} = bot\t{partner} = human') 99 | for turn in convo['convo']: 100 | print(turn) 101 | print() 102 | 103 | 104 | def print_human_convos(convos: Dict, shuffle: bool = True, max_prints: int = 10) -> None: 105 | if shuffle: 106 | convos_list = list(convos.items()) 107 | random.shuffle(convos_list) 108 | convos = dict(convos_list) 109 | 110 | for convo_id, convo in convos.items(): 111 | bot_0 = convo['system_type0'] 112 | bot_1 = convo['system_type1'] 113 | 114 | if bot_0 == 'human' and bot_1 == 'human': 115 | 116 | print(convo_id) 117 | for turn in convo['convo']: 118 | print(turn['text']) 119 | print() 120 | 121 | 122 | def pick_convos_w_developing_annotations(convos: Dict, shuffle: bool = True, max_print: int = 10) -> None: 123 | 124 | if shuffle: 125 | convos_list = list(convos.items()) 126 | random.shuffle(convos_list) 127 | convos = dict(convos_list) 128 | 129 | printed = 0 130 | 131 | for convo_id, convo in convos.items(): 132 | 133 | if printed > max_print: 134 | break 135 | 136 | bot_0 = convo['system_type0'] 137 | bot_1 = convo['system_type1'] 138 | 139 | if bot_0 == 'human': 140 | continue 141 | 142 | decision_turns = sorted(convo['annotations'].keys()) 143 | label_prog_e1, label_prog_e2 = list(), list() 144 | 145 | for decision_turn in decision_turns: 146 | 147 | labels_e1 = set([a['entity0_annotation']['is_human'] for a in convo['annotations'][decision_turn]]) 148 | labels_e2 = set([a['entity1_annotation']['is_human'] for a in convo['annotations'][decision_turn]]) 149 | 150 | label_prog_e1.append(labels_e1) 151 | label_prog_e2.append(labels_e2) 152 | 153 | if len(label_prog_e1[0]) == 1 and list(label_prog_e1[0])[0] is not False: 154 | 155 | if len(label_prog_e1[1]) == 1 and list(label_prog_e1[1])[0] is False: 156 | print(bot_0, label_prog_e1, convo_id, '\n') 157 | for turn in range(min((decision_turns[-1] * 2), len(convo['convo']))): 158 | print(convo['convo'][turn]['id'] + ':\t' + convo['convo'][turn]['text']) 159 | if (turn + 1) / 2 in decision_turns: 160 | print('-' * 10) 161 | print() 162 | printed += 1 163 | # breakpoint() 164 | 165 | elif len(label_prog_e1[-1]) == 1 and list(label_prog_e1[-1])[-1] is False: 166 | print(bot_0, label_prog_e1, convo_id, '\n') 167 | for turn in range(min((decision_turns[-1] * 2), len(convo['convo']))): 168 | print(convo['convo'][turn]['id'] + ':\t' + convo['convo'][turn]['text']) 169 | if (turn + 1) / 2 in decision_turns: 170 | print('-' * 10) 171 | print() 172 | printed += 1 173 | # breakpoint() 174 | 175 | if len(label_prog_e2[0]) == 1 and list(label_prog_e2[0])[0] is not False: 176 | 177 | if len(label_prog_e2[1]) == 1 and list(label_prog_e2[1])[0] is False: 178 | print(bot_1, label_prog_e2, convo_id, '\n') 179 | for turn in range(min((decision_turns[-1] * 2) , len(convo['convo']))): 180 | print(convo['convo'][turn]['id'] + ':\t' + convo['convo'][turn]['text']) 181 | if (turn + 1) / 2 in decision_turns: 182 | print('-' * 10) 183 | print() 184 | printed += 1 185 | # breakpoint() 186 | 187 | elif len(label_prog_e2[-1]) == 1 and list(label_prog_e2[-1])[0] is False: 188 | print(bot_1, label_prog_e2, convo_id, '\n') 189 | for turn in range(min((decision_turns[-1] * 2), len(convo['convo']))): 190 | print(convo['convo'][turn]['id'] + ':\t' + convo['convo'][turn]['text']) 191 | if (turn + 1) / 2 in decision_turns: 192 | print('-' * 10) 193 | print() 194 | printed += 1 195 | # breakpoint() 196 | 197 | 198 | if __name__ == '__main__': 199 | import os, sys, pickle, json 200 | sys.path.append('./') 201 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 202 | print('Downloading data') 203 | data = get_all_annotated_convos() 204 | 205 | pick_convos_w_developing_annotations(data) 206 | 207 | # print_human_convos(data) 208 | 209 | # pick_segments_w_different_annotations(data, max_prints=10) 210 | 211 | """ 212 | pick_example_for_segmentation(data) 213 | 214 | e1 = 'bert_rank' 215 | e2 = 'huggingface' 216 | # Specifiy desired annotations 217 | ann1 = True # True = human, False = bot, None = undecided 218 | ann2 = True 219 | cherry_pick(data, e1, e2, ann1, ann2) 220 | """ -------------------------------------------------------------------------------- /analysis/cox.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | import matplotlib.pyplot as plt 5 | 6 | from lifelines import CoxPHFitter 7 | 8 | from analysis.extract_event_data import fetch_event_data 9 | from templates.src.scoring_utils import create_black_list 10 | 11 | try: 12 | use_blacklist = sys.argv[1] == 'blacklist' 13 | except IndexError: 14 | use_blacklist = True 15 | 16 | if use_blacklist: 17 | blacklisted = set(create_black_list()) 18 | else: 19 | blacklisted = set() 20 | 21 | EVENT_DATA = fetch_event_data() 22 | EVENT_DATA = EVENT_DATA[~EVENT_DATA['user'].isin(blacklisted)] 23 | EVENT_DATA = EVENT_DATA[ 24 | ['domain_system', 'spotted', 'time', 'fluency', 'sensible', 'specific'] 25 | ] 26 | # EVENT_DATA['inv_specific'] = 50. / (1. + EVENT_DATA.specific) 27 | # EVENT_DATA['inv_fluency'] = 50. / (1. + EVENT_DATA.fluency) 28 | # EVENT_DATA['inv_sensible'] = 50. / (1. + EVENT_DATA.sensible) 29 | 30 | for system_domain in EVENT_DATA.domain_system.unique(): 31 | print(system_domain) 32 | cph = CoxPHFitter() 33 | # to_drop = ['fluency', 'specific', 'sensible', 'domain_system'] 34 | to_drop = ['domain_system'] 35 | data = EVENT_DATA.loc[EVENT_DATA['domain_system'] == system_domain].drop(columns=to_drop) 36 | cph.fit( 37 | data, 38 | duration_col='time', 39 | event_col='spotted', 40 | robust=True, 41 | show_progress=True, 42 | step_size=0.1, 43 | ) 44 | 45 | # cph.check_assumptions(data, show_plots=True) 46 | # plt.show() 47 | 48 | cph.print_summary() 49 | 50 | cph.plot() 51 | plt.title(f"Predictors {system_domain}") 52 | # plt.show() 53 | plt.savefig(f"predictors_{'-'.join(system_domain.split('/'))}.png") 54 | plt.close() 55 | print('*' * 80) 56 | 57 | -------------------------------------------------------------------------------- /analysis/db_configs.txt: -------------------------------------------------------------------------------- 1 | "package_collection_name": "packed-dialogues-full-dailydialog", 2 | "sampled_collection_name": "sampled-dialogues-full-dailydialog", 3 | "labelled_collection_name": "annotated-dialogues-full-dailydialog" 4 | 5 | "package_collection_name": "packed-dialogues-full-empathetic", 6 | "sampled_collection_name": "sampled-dialogues-full-empathetic", 7 | "labelled_collection_name": "annotated-dialogues-full-empathetic" 8 | 9 | "package_collection_name": "packed-dialogues-full-convai2", 10 | "sampled_collection_name": "sampled-dialogues-full-convai2", 11 | "labelled_collection_name": "annotated-dialogues-full-convai2" -------------------------------------------------------------------------------- /analysis/dumb_clf.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.feature_extraction.text import TfidfVectorizer 4 | from sklearn.svm import LinearSVC 5 | from sklearn.pipeline import Pipeline 6 | from sklearn.metrics import classification_report, plot_confusion_matrix 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | from templates.src.scoring_utils import get_all_annotated_convos 11 | 12 | SEED = 0xDEADBEEF 13 | 14 | CONVOS = list(get_all_annotated_convos().values()) 15 | 16 | TRAIN, TEST = train_test_split( 17 | CONVOS, 18 | test_size=0.2, 19 | random_state=SEED, 20 | shuffle=True, 21 | stratify=[ 22 | '_'.join(sorted([convo['system_type0'], convo['system_type1']])) 23 | for convo in CONVOS 24 | ], 25 | ) 26 | 27 | 28 | def pipeline(): 29 | return Pipeline( 30 | steps=[ 31 | ('vec', TfidfVectorizer(ngram_range=(1, 3), sublinear_tf=True)), 32 | ('clf', LinearSVC(random_state=SEED, class_weight='balanced')), 33 | ] 34 | ) 35 | 36 | 37 | def extract_clf_data(dataset, n_turns): 38 | texts = [] 39 | labels = [] 40 | 41 | for convo in dataset: 42 | label1 = convo['system_type0'] 43 | turns1 = [ 44 | entry['text'] 45 | for entry in convo['convo'] 46 | if entry['id'] == label1 47 | ] 48 | text1 = '\n'.join(turns1[:n_turns]) 49 | 50 | label2 = convo['system_type1'] 51 | turns2 = [ 52 | entry['text'] 53 | for entry in convo['convo'] 54 | if entry['id'] == label2 55 | ] 56 | text2 = '\n'.join(turns2[:n_turns]) 57 | 58 | texts.append(text1) 59 | labels.append(label1) 60 | texts.append(text2) 61 | labels.append(label2) 62 | 63 | return { 64 | 'x': texts, 65 | 'y': labels, 66 | } 67 | 68 | 69 | def experiment(n_turns, binary_task=False): 70 | 71 | train = extract_clf_data(TRAIN, n_turns=n_turns) 72 | test = extract_clf_data(TEST, n_turns=n_turns) 73 | if binary_task: 74 | train['y'] = [ 75 | 'human' if label == 'human' else 'bot' 76 | for label in train['y'] 77 | ] 78 | test['y'] = [ 79 | 'human' if label == 'human' else 'bot' 80 | for label in test['y'] 81 | ] 82 | 83 | clf = pipeline() 84 | 85 | clf.fit(train['x'], train['y']) 86 | 87 | print("n turns:\t", n_turns) 88 | print(classification_report(y_true=test['y'], y_pred=clf.predict(test['x']))) 89 | print('*' * 80) 90 | 91 | display = plot_confusion_matrix( 92 | estimator=clf, 93 | X=test['x'], 94 | y_true=test['y'] 95 | ) 96 | display.plot() 97 | name = "binary" if binary_task else 'all' 98 | plt.savefig(f"confusions_{n_turns}_{name}.png") 99 | 100 | return clf 101 | 102 | 103 | if __name__ == '__main__': 104 | for turns in range(1, 10): 105 | experiment(turns, binary_task=False) 106 | -------------------------------------------------------------------------------- /analysis/extract_event_data.py: -------------------------------------------------------------------------------- 1 | 2 | from templates.src.scoring_utils import ( 3 | get_annotator_names, 4 | get_annotations_for_user, 5 | get_annotated_convos_for_user, 6 | combine_annotations_and_convos_for_user, 7 | create_black_list 8 | ) 9 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 10 | from templates.src.segment_analysis.fooling_analysis import name_mapping_domain 11 | 12 | 13 | import pandas as pd 14 | import numpy as np 15 | 16 | 17 | def convo_type(system1, system2): 18 | if (system1 == 'human') and (system2 == 'human'): 19 | return 'human-human' 20 | elif (system1 == 'human') or (system2 == 'human'): 21 | return 'human-bot' 22 | else: 23 | return 'bot-bot' 24 | 25 | 26 | def turn_to_n_utterances(n_turns, bot_num): 27 | if bot_num == 0: 28 | return int((n_turns + 1) / 2) 29 | elif bot_num == 1: 30 | return int(n_turns / 2) 31 | else: 32 | raise ValueError(f"unknown entity number {bot_num}, use one of {{0, 1}}") 33 | 34 | 35 | def spotted(is_human_annotation): 36 | def survived(a): 37 | if a in {True, None}: 38 | return True 39 | else: 40 | return False 41 | 42 | return not survived(is_human_annotation) 43 | 44 | 45 | def feature_annotation_to_num(ann0, ann1): 46 | 47 | if ann0 and ann1: 48 | return 0, 0 49 | elif ann0 and (not ann1): 50 | return 1, -1 51 | elif (not ann0) and ann1: 52 | return -1, 1 53 | else: 54 | raise ValueError(f"this shouldn't happen: {ann0} {ann1}") 55 | 56 | 57 | def fetch_event_data(): 58 | annotator_info = get_annotator_names() 59 | 60 | raw = [] 61 | for name in annotator_info: 62 | for _, (annotation, convo) in combine_annotations_and_convos_for_user( 63 | get_annotations_for_user(name), 64 | get_annotated_convos_for_user(annotator_info[name]), 65 | ).items(): 66 | entry0 = { 67 | 'user': name, 68 | 'convo_id': annotation['convo_id'], 69 | 'convo_type': convo_type(convo['system_type0'], convo['system_type1']), 70 | 'system': convo['system_type0'], 71 | 'domain': convo['domain_name'], 72 | 'domain_system': convo['domain_name'] + '/' + convo['system_type0'], 73 | # 'spotted': int(not annotation['entity0_annotation']['is_human']), 74 | 'spotted': spotted(annotation['entity0_annotation']['is_human']), 75 | 'time': turn_to_n_utterances(annotation['entity0_annotation']['decision_turn'], bot_num=0), 76 | 'fluency': int(annotation['entity0_annotation']['fluencyValue']), 77 | 'sensible': int(annotation['entity0_annotation']['sensitivenessValue']), 78 | 'specific': int(annotation['entity0_annotation']['specificityValue']), 79 | } 80 | entry1 = { 81 | 'user': name, 82 | 'convo_id': annotation['convo_id'], 83 | 'convo_type': convo_type(convo['system_type0'], convo['system_type1']), 84 | 'system': convo['system_type1'], 85 | 'domain': convo['domain_name'], 86 | 'domain_system': convo['domain_name'] + '/' + convo['system_type1'], 87 | # 'spotted': int(not annotation['entity1_annotation']['is_human']), 88 | 'spotted': spotted(annotation['entity1_annotation']['is_human']), 89 | 'time': turn_to_n_utterances(annotation['entity1_annotation']['decision_turn'], bot_num=1), 90 | 'fluency': int(annotation['entity1_annotation']['fluencyValue']), 91 | 'sensible': int(annotation['entity1_annotation']['sensitivenessValue']), 92 | 'specific': int(annotation['entity1_annotation']['specificityValue']), 93 | } 94 | 95 | raw.append(entry0) 96 | raw.append(entry1) 97 | 98 | return pd.DataFrame.from_records(raw) 99 | 100 | 101 | class IDMapping: 102 | 103 | def __getitem__(self, item): 104 | return item 105 | 106 | 107 | def fetch_segmented(name_mapping=IDMapping()): 108 | data = get_all_annotated_convos() 109 | 110 | records = [] 111 | for convo_data in data.values(): 112 | system0 = name_mapping[convo_data['system_type0']] 113 | system1 = name_mapping[convo_data['system_type1']] 114 | for exchange, annotations in convo_data['annotations'].items(): 115 | for ann in annotations: 116 | 117 | fluent0, fluent1 = feature_annotation_to_num( 118 | ann['entity0_annotation']['fluencyValue'], 119 | ann['entity1_annotation']['fluencyValue'], 120 | ) 121 | sensible0, sensible1 = feature_annotation_to_num( 122 | ann['entity0_annotation']['sensitivenessValue'], 123 | ann['entity1_annotation']['sensitivenessValue'], 124 | ) 125 | specific0, specific1 = feature_annotation_to_num( 126 | ann['entity0_annotation']['specificityValue'], 127 | ann['entity1_annotation']['specificityValue'], 128 | ) 129 | 130 | spotted0 = spotted(ann['entity0_annotation']['is_human']) 131 | entry0 = { 132 | 'time_left': 0 if spotted0 else exchange, 133 | 'time_right': exchange if spotted0 else np.inf, 134 | 'censor_type': 3 if spotted0 else 0, # for R, 0 means right censored, 3 means interval censored 135 | 'system': system0, 136 | 'fluent': fluent0, 137 | 'sensible': sensible0, 138 | 'specific': specific0, 139 | 140 | } 141 | 142 | spotted1 = spotted(ann['entity1_annotation']['is_human']) 143 | entry1 = { 144 | 'time_left': 0 if spotted1 else exchange, 145 | 'time_right': exchange if spotted1 else np.inf, 146 | 'censor_type': 3 if spotted1 else 0, 147 | 'system': system1, 148 | 'fluent': fluent1, 149 | 'sensible': sensible1, 150 | 'specific': specific1, 151 | } 152 | 153 | records.append(entry0) 154 | records.append(entry1) 155 | 156 | frame = pd.DataFrame.from_records(records) 157 | 158 | sys_map = {sys: ix for ix, sys in enumerate(frame.system.unique())} 159 | frame['system_id'] = frame.system.apply(lambda sys: sys_map[sys]) 160 | 161 | return frame 162 | 163 | 164 | def export_to_csv(path): 165 | black_list = create_black_list() 166 | 167 | data = fetch_event_data() 168 | data = data[~data['user'].isin(black_list)] 169 | 170 | data['last_alive'] = data.time * ~data.spotted 171 | data['seen_dead'] = data.time * data.spotted 172 | data['event_type'] = 3 * data.spotted 173 | 174 | out = data[['last_alive', 'seen_dead', 'event_type', 'system']] 175 | 176 | out.to_csv(path) 177 | 178 | 179 | if __name__ == '__main__': 180 | df = fetch_segmented() 181 | df.to_csv('event_data.csv') 182 | 183 | -------------------------------------------------------------------------------- /analysis/feature_agreement.py: -------------------------------------------------------------------------------- 1 | 2 | from templates.src.scoring_utils import ( 3 | get_all_annotated_convos, 4 | create_black_list, 5 | ) 6 | 7 | from analysis.krippendorff import KrippenDorff 8 | 9 | 10 | def _extract_annotation(left_entity_ann: bool, right_entity_ann: bool): 11 | if (left_entity_ann == True) and (right_entity_ann == True): # verbose for clarity 12 | return '=', 13 | elif (left_entity_ann == True) and (right_entity_ann == False): 14 | return '>', 15 | elif (left_entity_ann == False) and (right_entity_ann == True): 16 | return '<', 17 | else: 18 | raise ValueError(f"cannot handle case left: {left_entity_ann}, right {right_entity_ann}") 19 | 20 | 21 | def _extract_annotations(blacklisted): 22 | 23 | bot_pair_to_annotations = {} 24 | for convo_id, convo in get_all_annotated_convos().items(): 25 | ent0_type = convo['system_type0'] 26 | ent1_type = convo['system_type1'] 27 | 28 | if ent0_type <= ent1_type: 29 | bot_pair = (ent0_type, ent1_type) 30 | left_annotation = 'entity0_annotation' 31 | right_annotation = 'entity1_annotation' 32 | else: 33 | bot_pair = (ent1_type, ent0_type) 34 | left_annotation = 'entity1_annotation' 35 | right_annotation = 'entity0_annotation' 36 | 37 | annotations = [ 38 | { 39 | 'fluency': _extract_annotation( 40 | left_entity_ann=ann[left_annotation]['fluencyValue'], 41 | right_entity_ann=ann[right_annotation]['fluencyValue'], 42 | ), 43 | 'sensitiveness': _extract_annotation( 44 | left_entity_ann=ann[left_annotation]['sensitivenessValue'], 45 | right_entity_ann=ann[right_annotation]['sensitivenessValue'], 46 | ), 47 | 'specificity': _extract_annotation( 48 | left_entity_ann=ann[left_annotation]['specificityValue'], 49 | right_entity_ann=ann[right_annotation]['specificityValue'], 50 | ), 51 | } 52 | for ann in convo['annotations'] 53 | if ann['user_name'] not in blacklisted 54 | ] 55 | 56 | if len(annotations) > 0: 57 | if bot_pair_to_annotations.get(bot_pair) is None: 58 | bot_pair_to_annotations[bot_pair] = [] 59 | 60 | bot_pair_to_annotations[bot_pair].append(annotations) 61 | 62 | return { 63 | bot_pair: { 64 | 'fluency': [[a['fluency'] for a in ann_list] for ann_list in anns], 65 | 'sensitiveness': [[a['sensitiveness'] for a in ann_list] for ann_list in anns], 66 | 'specificity': [[a['specificity'] for a in ann_list] for ann_list in anns], 67 | } 68 | for bot_pair, anns in bot_pair_to_annotations.items() 69 | } 70 | 71 | 72 | def dist_fn(ann_tup1, ann_tup2): 73 | assert len(ann_tup1) == 1 74 | assert len(ann_tup2) == 1 75 | 76 | ann1 = ann_tup1[0] 77 | ann2 = ann_tup2[0] 78 | 79 | if ann1 == ann2: 80 | return 0.0 81 | else: 82 | if (ann1 == '=') or (ann2 == '='): 83 | return 0.5 84 | else: 85 | return 1.0 86 | 87 | 88 | def main(use_blacklist): 89 | 90 | blacklisted = set(create_black_list()) if use_blacklist else set() 91 | 92 | annotation_dict = _extract_annotations(blacklisted) 93 | 94 | for bot_pair in sorted(annotation_dict.keys()): 95 | left, right = bot_pair 96 | print(f"{left} vs {right}:") 97 | for feature in ['fluency', 'sensitiveness', 'specificity']: 98 | alpha = KrippenDorff(dist_fun=dist_fn).alpha(annotation_dict[bot_pair][feature])['alpha'] 99 | print(f'\t{feature}:\t{alpha}') 100 | 101 | 102 | if __name__ == '__main__': 103 | import sys 104 | try: 105 | blacklist = sys.argv[1] == 'blacklist' 106 | except IndexError: 107 | blacklist = True 108 | 109 | main(use_blacklist=blacklist) 110 | -------------------------------------------------------------------------------- /analysis/inter_annotator_agreement.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from collections import defaultdict, Counter 4 | from typing import Dict, List 5 | 6 | 7 | def feature_agreement(convos: List) -> None: 8 | 9 | def get_feature_decision(annotation: Dict) -> str: 10 | if annotation['entity0_annotation'][feature] is True and annotation['entity1_annotation'][feature] is True: 11 | decision = 'draw' 12 | elif annotation['entity0_annotation'][feature] is True and annotation['entity1_annotation'][feature] is False: 13 | decision = 'bot_0' 14 | elif annotation['entity0_annotation'][feature] is False and annotation['entity1_annotation'][feature] is True: 15 | decision = 'bot_1' 16 | else: 17 | breakpoint() 18 | return decision 19 | 20 | features = {'fluencyValue', 'sensitivenessValue', 'specificityValue'} 21 | agreement_per_feature = defaultdict(list) 22 | annotations_per_feature = defaultdict(list) 23 | draw_count, annotation_count = 0, 0 24 | 25 | for convo in convos: 26 | 27 | for decision_turn, annotations in convo['annotations'].items(): 28 | 29 | if len(annotations) != 2: # Shouldn't happen 30 | continue 31 | for feature in features: 32 | decisions = [get_feature_decision(annotation) for annotation in annotations] 33 | # if 'draw' in decisions: continue 34 | draw_count += decisions.count('draw') 35 | annotation_count += len(decisions) 36 | agreement = 1 if len(set(decisions)) == 1 else 0 37 | agreement_per_feature[feature].append(agreement) 38 | annotations_per_feature[feature].extend(decisions) 39 | 40 | for feature, agreement in agreement_per_feature.items(): 41 | print(f'{feature}\t{round(np.mean(agreement), 2)}\t{len(agreement)}') 42 | print('Annotations:', annotation_count) 43 | print('Draws:', draw_count, f'({round(100*draw_count / annotation_count)}%)') 44 | print('Draws per feature') 45 | for feature, annotations in annotations_per_feature.items(): 46 | print(feature, round(100*annotations.count('draw') / len(annotations), 2), '%') 47 | breakpoint() 48 | 49 | 50 | def label_agreement(convos: List) -> None: 51 | 52 | decision_agreements = defaultdict(list) 53 | decision_agreements_per_turn = defaultdict(lambda: defaultdict(list)) 54 | label_pairs_per_bot = defaultdict(Counter) # label_pairs[bot][(label1, label2)] = 11 55 | label_fractions_per_bot = defaultdict(lambda: defaultdict(list)) 56 | 57 | label_names = {True: 'human', False: 'bot', None: 'unsure'} 58 | for convo in convos: 59 | 60 | bot_0 = convo['system_type0'] # change to system_name0 for domain-specificity 61 | bot_1 = convo['system_type1'] 62 | 63 | # if not {bot_1, bot_0} == {'Human', 'Cleverbot'}: continue # Only sample specific convos 64 | # if 'Cleverbot' in {bot_0, bot_1} or 'Mitsuku' in {bot_0, bot_1}: continue # exclude certain convos 65 | 66 | for decision_turn, annotations in convo['annotations'].items(): 67 | decisions_entity0, decisions_entity1 = list(), list() 68 | 69 | if len(annotations) != 2: 70 | continue 71 | 72 | for annotation in annotations: 73 | decisions_entity0.append(annotation['entity0_annotation']['is_human']) 74 | decisions_entity1.append(annotation['entity1_annotation']['is_human']) 75 | 76 | decision_agreements[bot_0].append(len(set(decisions_entity0))) 77 | decision_agreements[bot_1].append(len(set(decisions_entity1))) 78 | decision_agreements_per_turn[bot_0][decision_turn].append(len(set(decisions_entity0))) 79 | decision_agreements_per_turn[bot_1][decision_turn].append(len(set(decisions_entity1))) 80 | 81 | named_decisions_entity0 = [label_names[label] for label in decisions_entity0] 82 | named_decisions_entity1 = [label_names[label] for label in decisions_entity1] 83 | 84 | label_pairs_per_bot[bot_0][tuple(sorted(named_decisions_entity0))] += 1 85 | label_pairs_per_bot[bot_1][tuple(sorted(named_decisions_entity1))] += 1 86 | 87 | for label in named_decisions_entity0: 88 | agreement = 1 if set(named_decisions_entity0) == {label} else 0 89 | #agreement = named_decisions_entity0.count(label) / len(named_decisions_entity0) 90 | label_fractions_per_bot[bot_0][label].append(agreement) 91 | for label in named_decisions_entity1: 92 | agreement = 1 if set(named_decisions_entity1) == {label} else 0 93 | #agreement = named_decisions_entity1.count(label) / len(named_decisions_entity1) 94 | label_fractions_per_bot[bot_1][label].append(agreement) 95 | 96 | print('Label agreement per bot per turn:') 97 | decision_turns = sorted(decision_agreements_per_turn[list(decision_agreements_per_turn)[0]].keys()) 98 | print('bot\t' + '\t'.join(str(t) for t in decision_turns) + '\toverall') 99 | for bot in decision_agreements: 100 | table_row = [bot] 101 | turn_agreements = list() 102 | for turn in decision_turns: 103 | turn_agreements.append(decision_agreements_per_turn[bot][turn].count(1) / len(decision_agreements_per_turn[bot][turn])) 104 | table_row.append('\t'.join(str(round(a, 2)) for a in turn_agreements)) 105 | table_row.append(str(round(decision_agreements[bot].count(1) / len(decision_agreements[bot]), 2))) 106 | print('\t'.join(table_row)) 107 | 108 | print('Percentage of annotated label pairs') 109 | label_pairs_set = set() 110 | for bot, label_pairs in label_pairs_per_bot.items(): 111 | label_pairs_set.update(label_pairs.keys()) 112 | label_pairs_set = sorted(label_pairs_set) 113 | label_pairs_str = '\t'.join('-'.join(lp) for lp in label_pairs_set) 114 | print('Bot\t' + label_pairs_str) 115 | for bot in label_pairs_per_bot: 116 | table_row = [bot] 117 | annotation_count = sum(label_pairs_per_bot[bot].values()) 118 | for label_pair in label_pairs_set: 119 | table_row.append(str(round(label_pairs_per_bot[bot][label_pair] / annotation_count, 2))) 120 | print('\t'.join(table_row)) 121 | 122 | print('Agreement per label') 123 | label_set = sorted((label_names.values())) 124 | print('Bot\t' + '\t'.join(label_set)) 125 | for bot, label_fractions in label_fractions_per_bot.items(): 126 | table_row = [bot] 127 | for label in label_set: 128 | table_row.append(str(round(np.mean(label_fractions[label]), 2))) 129 | print('\t'.join(table_row)) 130 | 131 | 132 | def win_function_agreement(convos: List) -> None: 133 | 134 | agreement_ratios = defaultdict(list) 135 | 136 | for convo in convos: 137 | 138 | bot_0 = convo['system_type0'] 139 | bot_1 = convo['system_type1'] 140 | winners = defaultdict(list) 141 | 142 | for decision_turn, annotations in convo['annotations'].items(): 143 | 144 | if len(annotations) != 2 : 145 | print(convo['_id'], decision_turn, annotations[0]['package_id']) 146 | continue 147 | 148 | for annotation in annotations: 149 | e0_ann = annotation['entity0_annotation']['is_human'] 150 | e1_ann = annotation['entity1_annotation']['is_human'] 151 | # Determine winner 152 | if e0_ann == e1_ann: 153 | winner = None # Draw 154 | elif e0_ann is None: # e0 has been annotated as 'Undecided' 155 | winner = bot_1 if e1_ann is True else bot_0 # else mean e1_ann = False (annotated as bot) 156 | elif e0_ann is True: 157 | winner = bot_0 # e1_ann is either undecided or bot 158 | elif e0_ann is False: 159 | winner = bot_1 # e0_ann is False and e1_ann is not 160 | else: 161 | raise NotImplementedError # Sanity check for forgotten conditions 162 | winners[decision_turn].append(winner) 163 | 164 | for decision_turn, turn_winners in winners.items(): 165 | agreement = 1 if len(set(turn_winners)) == 1 else 0 166 | agreement_ratios[tuple(sorted([bot_0, bot_1]))].append(agreement) 167 | 168 | print('Agreement on match outcome') 169 | for pair, ratios in agreement_ratios.items(): 170 | print(f'{pair}\t{np.mean(ratios)}\t{np.std(ratios)}\t{len(ratios)}') 171 | 172 | 173 | if __name__ == '__main__': 174 | import sys 175 | import json 176 | conversation_json_file = sys.argv[1] # e.g. sampled-dialogues-full-convai2.json in ../data_dump/MongoDump.zip 177 | convos = json.load(open(conversation_json_file)) 178 | 179 | label_agreement(convos) 180 | 181 | # Not used in the paper: 182 | # feature_agreement(convos) 183 | # win_function_agreement(convos) 184 | -------------------------------------------------------------------------------- /analysis/krippendorff.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Dict, Collection, Callable, Tuple, List, Set 2 | from collections import defaultdict, Counter 3 | import numpy as np 4 | import warnings 5 | 6 | def _canonical_form(labels: Collection) -> Tuple: 7 | """ 8 | transform label sets into a form that can be used as dictionary keys 9 | """ 10 | if len(labels) == 0: 11 | return 'label', 12 | else: 13 | return tuple(sorted(labels)) 14 | 15 | 16 | class KrippenDorff(object): 17 | """ 18 | computes Krippendorff's alpha 19 | """ 20 | 21 | def __init__( 22 | self, 23 | dist_fun: Callable[[Tuple, Tuple], float] 24 | ): 25 | """ 26 | 27 | :param dist_fun: distance function used in computation of alpha 28 | """ 29 | self.dist_fun = dist_fun 30 | 31 | def alpha(self, annotations: List[List[Tuple]]): 32 | """ 33 | :param annotations: for every item the list of annotations as tuples 34 | :return: a dictionary of the form 35 | { 36 | 'alpha': float, the krippendorff alpha 37 | 'disagreement_scores': List[float], for every item the value in the 38 | enumerator of the krippendorff calculation 39 | } 40 | """ 41 | values_by_units: List[Counter] = [ 42 | Counter(anns) 43 | for anns in annotations 44 | ] 45 | 46 | # number of annotations per segment 47 | n_u_dot: List[int] = _n_u_dot(values_by_units) 48 | # number of times a specific label_set has been observed 49 | # excluding times where it is the only annotation 50 | n_dot_u: Dict[Tuple, int] = _n_dot_u( 51 | values_by_units, 52 | n_u_dot, 53 | ) 54 | # total number of possible confusable label_sets 55 | n_dot_dot: int = sum(v for _, v in n_dot_u.items()) 56 | 57 | per_unit_disagreements: List[float] = self._per_unit_disagreement(values_by_units) 58 | observed_disagreement: float = sum(per_unit_disagreements) 59 | 60 | expected_disagreement = self._expected_disagreement(n_dot_u) 61 | if np.isclose(expected_disagreement, 0.0): 62 | warnings.warn( 63 | message=f"expected disagreement is (close to) 0, you might need to check your data or dist_fn", 64 | category=RuntimeWarning, 65 | ) 66 | alpha = 1.0 67 | else: 68 | alpha: float = 1 - (n_dot_dot - 1)*(observed_disagreement / expected_disagreement) 69 | 70 | return { 71 | 'alpha': alpha, 72 | 'disagreement_scores': per_unit_disagreements, 73 | } 74 | 75 | def _expected_disagreement(self, n_dot_u: Dict[Tuple, int]) -> float: 76 | result: float = 0.0 77 | all_tuples: List[Tuple] = list(n_dot_u.keys()) 78 | for i in range(len(all_tuples)): 79 | for j in range(i + 1, len(all_tuples)): 80 | tup1 = all_tuples[i] 81 | tup2 = all_tuples[j] 82 | 83 | result += n_dot_u[tup1]*n_dot_u[tup2]*self.dist_fun(tup1, tup2) 84 | return result 85 | 86 | def _per_unit_disagreement(self, values_per_units) -> List[float]: 87 | return [ 88 | self._single_unit_disagreement(counter) 89 | for counter in values_per_units 90 | ] 91 | 92 | def _single_unit_disagreement(self, counter: Counter) -> float: 93 | # number of annotations for this specific item 94 | n_u_dot: int = sum(c for _, c in counter.items()) 95 | # no disagreement if not enough annotations 96 | if n_u_dot < 2: 97 | return 0.0 98 | 99 | result: float = 0.0 100 | tups: List[Tuple] = list(counter.keys()) 101 | for i in range(len(tups)): 102 | for j in range(i + 1, len(tups)): 103 | tup1 = tups[i] 104 | tup2 = tups[j] 105 | result += counter[tup1]*counter[tup2]*self.dist_fun(tup1, tup2) 106 | 107 | return result / (n_u_dot - 1) 108 | 109 | 110 | def _n_u_dot(values_by_units: List[Counter]) -> List[int]: 111 | """ 112 | helper function computing number of annotations for a segment 113 | """ 114 | return [ 115 | sum(c for _, c in counter.items()) 116 | for counter in values_by_units 117 | ] 118 | 119 | 120 | def _n_dot_u(values_by_units: List[Counter], n_u_dot: List[int]) -> Dict[Tuple, int]: 121 | """ 122 | number of times a specific tuple is pairable in the data 123 | (pairable means there is at least 1 other annotation on the same segment) 124 | """ 125 | all_observed_sets = { 126 | tup 127 | for counter in values_by_units 128 | for tup, _ in counter.items() 129 | } 130 | 131 | return { 132 | tup: sum( 133 | counter[tup] 134 | for i, counter in enumerate(values_by_units) 135 | if n_u_dot[i] > 1 136 | ) 137 | for tup in all_observed_sets 138 | } 139 | 140 | 141 | def dist_fn(tup1, tup2): 142 | assert len(tup1) == 1 143 | assert len(tup2) == 1 144 | return 1.0 - float(tup1[0] == tup2[0]) 145 | 146 | 147 | if __name__ == '__main__': 148 | import sys 149 | sys.path.append('./') 150 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 151 | 152 | """ 153 | try: 154 | use_blacklist = sys.argv[1] == 'blacklist' 155 | except IndexError: 156 | use_blacklist = True 157 | 158 | if use_blacklist: 159 | blacklisted = set(create_black_list()) 160 | else: 161 | blacklisted = set() 162 | """ 163 | blacklisted = set() 164 | 165 | convo_id_to_annotators = {} 166 | convo_ids = [] 167 | annotations = [] 168 | systems = [] 169 | convos = get_all_annotated_convos() 170 | 171 | for convo_id, convo in convos.items(): 172 | decision_exchanges = convo['annotations'].keys() 173 | for decision_exchange in decision_exchanges: 174 | ent0_anns = [ 175 | ('human' if ann['entity0_annotation']['is_human'] else 'bot',) 176 | for ann in convo['annotations'][decision_exchange] 177 | if ann['user_name'] not in blacklisted 178 | ] 179 | ent1_anns = [ 180 | ('human' if ann['entity1_annotation']['is_human'] else 'bot',) 181 | for ann in convo['annotations'][decision_exchange] 182 | if ann['user_name'] not in blacklisted 183 | ] 184 | convo_id_to_annotators[convo_id] = [ 185 | ann['user_name'] 186 | for ann in convo['annotations'][decision_exchange] 187 | if ann['user_name'] not in blacklisted 188 | ] 189 | if len(ent0_anns) > 0: 190 | convo_ids.append(convo_id + '-ent0') 191 | annotations.append(ent0_anns) 192 | systems.append(convo['system_type0']) 193 | if len(ent1_anns) > 0: 194 | convo_ids.append(convo_id + '-ent1') 195 | annotations.append(ent1_anns) 196 | systems.append(convo['system_type1']) 197 | 198 | kripp = KrippenDorff(dist_fun=dist_fn) 199 | kripp_res = kripp.alpha(annotations=annotations) 200 | print(f"krippendorff alpha\toverall:\t{kripp_res['alpha']:.3f}") 201 | 202 | for system_name in sorted(set(systems)): 203 | k = KrippenDorff(dist_fun=dist_fn).alpha( 204 | annotations=[ 205 | ann 206 | for ann, name in zip(annotations, systems) 207 | if name == system_name 208 | ] 209 | ) 210 | print(f"krippendorff alpha\t{system_name}:\t{k['alpha']:.3f}") 211 | 212 | annotator_disagreements = {} 213 | for convo_id, score in zip(convo_ids, kripp_res['disagreement_scores']): 214 | annotators = convo_id_to_annotators[convo_id[:-5]] 215 | for a in annotators: 216 | if annotator_disagreements.get(a) is None: 217 | annotator_disagreements[a] = [] 218 | annotator_disagreements[a].append(score) 219 | 220 | average_disagreement_per_annotator = { 221 | a: np.mean(disagreements) 222 | for a, disagreements in annotator_disagreements.items() 223 | } 224 | 225 | print("annotator\taverage disagreement per annotation") 226 | for item in sorted( 227 | average_disagreement_per_annotator.items(), 228 | key=lambda tup: tup[1], 229 | reverse=True): 230 | print("\t".join(map(str, item))) 231 | -------------------------------------------------------------------------------- /analysis/nonparametric_survival.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | 4 | from lifelines import KaplanMeierFitter 5 | from lifelines.statistics import pairwise_logrank_test 6 | 7 | 8 | def estimate_survival(df, plot=True, censoring='right', fig_path=None): 9 | 10 | if censoring not in {'right', 'left'}: 11 | raise ValueError(f"unknown fit type: {censoring}," 12 | f" use one of {{'left', 'right'}}") 13 | 14 | kmf = KaplanMeierFitter(alpha=1.0) # disable confidence interval 15 | 16 | if plot: 17 | fig = plt.figure(figsize=(20, 10)) 18 | ax = fig.add_subplot(111) 19 | 20 | medians = {} 21 | for system in sorted(df.domain_system.unique()): 22 | if censoring == 'right': 23 | kmf.fit( 24 | df.loc[df['domain_system'] == system].time, 25 | df.loc[df['domain_system'] == system].spotted, 26 | label=system, 27 | ) 28 | elif censoring == 'left': 29 | kmf.fit_left_censoring( 30 | df.loc[df['domain_system'] == system].time, 31 | ~df.loc[df['domain_system'] == system].spotted, 32 | label=system, 33 | ) 34 | else: 35 | raise ValueError(f"unknown fit type: {censoring}," 36 | f" use one of {{'left', 'right'}}") 37 | 38 | if plot: 39 | kmf.plot_survival_function(ax=ax) 40 | 41 | medians[system] = kmf.median_survival_time_ 42 | 43 | if plot: 44 | plt.ylim(0.0, 1.0) 45 | plt.xlabel("Turns") 46 | plt.ylabel("Survival Probability") 47 | plt.title("Estimated Survival Function of different systems") 48 | save_path = fig_path or "survival.png" 49 | print(f'saving plot of estimated survival functions to: {save_path}') 50 | plt.savefig(save_path) 51 | 52 | return medians 53 | 54 | 55 | def comparisons(df, print=True): 56 | res = pairwise_logrank_test( 57 | event_durations=df.time, 58 | event_observed=df.spotted, 59 | groups=df.domain_system, 60 | ) 61 | 62 | if print: 63 | res.print_summary() 64 | 65 | return res 66 | 67 | 68 | if __name__ == '__main__': 69 | import sys 70 | from templates.src.scoring_utils import create_black_list 71 | from analysis.extract_event_data import fetch_event_data 72 | 73 | try: 74 | use_blacklist = sys.argv[1] == 'blacklist' 75 | except IndexError: 76 | use_blacklist = True 77 | 78 | try: 79 | censoring = sys.argv[2] 80 | except IndexError: 81 | censoring = 'right' 82 | 83 | print(f"Fitting {'with' if use_blacklist else 'without'} blacklist and {censoring} censoring.") 84 | 85 | if use_blacklist: 86 | blacklisted = set(create_black_list()) 87 | else: 88 | blacklisted = set() 89 | 90 | data = fetch_event_data() 91 | data = data[~data['user'].isin(blacklisted)] 92 | 93 | # data = data.loc[data['convo_type'] == 'human-bot'] 94 | 95 | for domain in data.domain.unique(): 96 | 97 | sub = data.loc[data['domain'] == domain] 98 | 99 | medians = estimate_survival( 100 | sub, 101 | plot=True, 102 | censoring=censoring, 103 | fig_path=f"survival_{domain}.png", 104 | ) 105 | print("System\tMedian Survival") 106 | for system in sorted(sub.domain_system.unique()): 107 | print(f"{system}\t{medians[system]}") 108 | 109 | comparisons(sub, print=True) 110 | 111 | print('*' * 80) 112 | -------------------------------------------------------------------------------- /analysis/survival.R: -------------------------------------------------------------------------------- 1 | library(survival) 2 | library(survminer) 3 | library(glrt) 4 | library(icenReg) 5 | 6 | 7 | data <- read.csv("event_data.csv"); 8 | 9 | surv_fit <- survfit(Surv(time_left, time_right, censor_type, type = 'interval') ~ strata(system), data = data); 10 | 11 | all_systems = unique(data$system); 12 | 13 | strata_names <- c() 14 | for (name in names(surv_fit$strata)) { 15 | strata_names <- c(strata_names, strsplit(name, "=")[[1]][2]) 16 | } 17 | fontsize <- 20 18 | legend_size <- 40 19 | ggsurvplot( 20 | surv_fit, 21 | data = data, 22 | font.x = fontsize, 23 | font.y = fontsize, 24 | font.legend = legend_size, 25 | font.tickslab = fontsize, 26 | legend.labs = strata_names, 27 | legend.title = "Systems", 28 | size=4, 29 | ); 30 | 31 | significance_cutoff = 0.05; 32 | n_bonferroni = (length(all_systems) * (length(all_systems) - 1)) / 2; # number of compared pairs 33 | 34 | for (i in seq(1, length(all_systems) - 1)) { 35 | for (j in seq(i + 1, length(all_systems))) { 36 | sys1 = all_systems[i]; 37 | sys2 = all_systems[j]; 38 | sub_frame = subset(data, system == sys1 | system == sys2); 39 | 40 | mat <- matrix(data = 0, nrow = length(sub_frame$X), ncol = 3); 41 | mat[,1] = sub_frame$time_left; 42 | mat[,2] = sub_frame$time_right; 43 | mat[,3] = sub_frame$system == sys1; 44 | 45 | test <- gLRT(mat, k = 2); 46 | 47 | if (test$p < (significance_cutoff / n_bonferroni)) { 48 | significant_after_correction <- 'yes' 49 | } else { 50 | significant_after_correction <- 'no' 51 | } 52 | 53 | cat(sys1, sys2, test$p, significant_after_correction,'\n') 54 | } 55 | } 56 | 57 | 58 | for (i in seq(1, length(all_systems))) { 59 | cat(all_systems[i], '\n') 60 | show(ic_sp(Surv(time_left, time_right, censor_type, type = 'interval') ~ fluent + specific + sensible, data = subset(data, system == all_systems[i]), bs_samples = 200)) 61 | cat('\n') 62 | } 63 | -------------------------------------------------------------------------------- /configurations.py: -------------------------------------------------------------------------------- 1 | class BaseCongig(object): 2 | ''' 3 | Base config class 4 | ''' 5 | DEBUG = True 6 | TESTING = False 7 | class ProductionConfig(BaseCongig): 8 | """ 9 | Production specific config 10 | """ 11 | DEBUG = False 12 | class DevelopmentConfig(BaseCongig): 13 | """ 14 | Development environment specific configuration 15 | """ 16 | DEBUG = True 17 | TESTING = True 18 | -------------------------------------------------------------------------------- /data_dump/MongoDump.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/data_dump/MongoDump.zip -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: parlai_37 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - absl-py=0.9.0=py37hc8dfbb8_1 9 | - argon2-cffi=20.1.0=py37he774522_1 10 | - attrs=19.3.0=py_0 11 | - backcall=0.2.0=py_0 12 | - blas=1.0=mkl 13 | - bleach=3.1.5=py_0 14 | - blinker=1.4=py_1 15 | - brotlipy=0.7.0=py37h4ab8f01_1000 16 | - ca-certificates=2020.7.22=0 17 | - cachetools=4.1.1=py_0 18 | - catalogue=1.0.0=py_0 19 | - certifi=2020.6.20=py37_0 20 | - cffi=1.14.0=py37ha419a9e_0 21 | - chardet=3.0.4=py37hc8dfbb8_1006 22 | - click=7.1.2=py_0 23 | - colorama=0.4.3=py_0 24 | - cryptography=2.9.2=py37h26f1ce3_0 25 | - cudatoolkit=10.1.243=h74a9793_0 26 | - cycler=0.10.0=py37_0 27 | - cymem=2.0.3=py37h1834ac0_2 28 | - cython-blis=0.4.1=py37h8055547_1 29 | - decorator=4.4.2=py_0 30 | - defusedxml=0.6.0=py_0 31 | - entrypoints=0.3=py37_0 32 | - freetype=2.10.2=hd328e21_0 33 | - google-auth=1.17.2=py_0 34 | - google-auth-oauthlib=0.4.1=py_2 35 | - grpcio=1.27.2=py37h554427f_0 36 | - icc_rt=2019.0.0=h0cc432a_1 37 | - icu=58.2=ha925a31_3 38 | - idna=2.10=pyh9f0ad1d_0 39 | - importlib-metadata=1.7.0=py37hc8dfbb8_0 40 | - importlib_metadata=1.7.0=0 41 | - intel-openmp=2020.1=216 42 | - ipykernel=5.3.4=py37h5ca1d4c_0 43 | - ipython=7.18.1=py37h5ca1d4c_0 44 | - ipython_genutils=0.2.0=py37_0 45 | - itsdangerous=1.1.0=py37_0 46 | - jedi=0.17.2=py37_0 47 | - jinja2=2.11.2=py_0 48 | - jpeg=9b=hb83a4c4_2 49 | - jsonschema=3.2.0=py37hc8dfbb8_1 50 | - jupyter_client=6.1.6=py_0 51 | - jupyter_core=4.6.3=py37_0 52 | - kiwisolver=1.2.0=py37h74a9793_0 53 | - libpng=1.6.37=h2a8f88b_0 54 | - libprotobuf=3.12.3=h7bd577a_0 55 | - libsodium=1.0.18=h62dcd97_0 56 | - libtiff=4.1.0=h56a325e_1 57 | - lz4-c=1.9.2=h62dcd97_1 58 | - m2w64-gcc-libgfortran=5.3.0=6 59 | - m2w64-gcc-libs=5.3.0=7 60 | - m2w64-gcc-libs-core=5.3.0=7 61 | - m2w64-gmp=6.1.0=2 62 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 63 | - markdown=3.2.2=py_0 64 | - markupsafe=1.1.1=py37he774522_0 65 | - matplotlib=3.2.2=0 66 | - matplotlib-base=3.2.2=py37h64f37c6_0 67 | - mistune=0.8.4=py37hfa6e2cd_1001 68 | - mkl=2019.4=245 69 | - mkl-service=2.3.0=py37hb782905_0 70 | - mkl_fft=1.1.0=py37h45dec08_0 71 | - mkl_random=1.1.0=py37h675688f_0 72 | - msys2-conda-epoch=20160418=1 73 | - murmurhash=1.0.0=py37h63f7a3c_0 74 | - nb_conda=2.2.1=py37_0 75 | - nb_conda_kernels=2.2.4=py37_0 76 | - nbconvert=5.6.1=py37_1 77 | - nbformat=5.0.7=py_0 78 | - notebook=6.1.1=py37_0 79 | - numpy=1.18.5=py37h6530119_0 80 | - numpy-base=1.18.5=py37hc3f5095_0 81 | - oauthlib=3.0.1=py_0 82 | - olefile=0.46=py37_0 83 | - openssl=1.1.1g=he774522_1 84 | - packaging=20.4=py_0 85 | - pandas=1.0.5=py37h47e9c7a_0 86 | - pandoc=2.10.1=0 87 | - pandocfilters=1.4.2=py37_1 88 | - parso=0.7.0=py_0 89 | - pickleshare=0.7.5=py37_1001 90 | - pillow=7.2.0=py37hcc1f983_0 91 | - pip=20.1.1=py37_1 92 | - plac=0.9.6=py37_0 93 | - preshed=3.0.2=py37h1834ac0_3 94 | - prometheus_client=0.8.0=py_0 95 | - prompt-toolkit=3.0.7=py_0 96 | - pyasn1=0.4.8=py_0 97 | - pyasn1-modules=0.2.7=py_0 98 | - pycparser=2.20=pyh9f0ad1d_2 99 | - pygments=2.6.1=py_0 100 | - pyjwt=1.7.1=py_0 101 | - pymongo=3.10.1=py37ha925a31_0 102 | - pyopenssl=19.1.0=py_1 103 | - pyparsing=2.4.7=py_0 104 | - pyqt=5.9.2=py37h6538335_2 105 | - pyrsistent=0.16.0=py37h8055547_0 106 | - pysocks=1.7.1=py37hc8dfbb8_1 107 | - python=3.7.7=h81c818b_4 108 | - python_abi=3.7=1_cp37m 109 | - pytorch=1.6.0=py3.7_cuda101_cudnn7_0 110 | - pytz=2020.1=py_0 111 | - pywin32=227=py37he774522_1 112 | - pywinpty=0.5.7=py37_0 113 | - pyzmq=19.0.1=py37ha925a31_1 114 | - qt=5.9.7=vc14h73c81de_0 115 | - requests=2.24.0=pyh9f0ad1d_0 116 | - requests-oauthlib=1.2.0=py_0 117 | - rsa=4.6=pyh9f0ad1d_0 118 | - seaborn=0.10.1=py_0 119 | - send2trash=1.5.0=py37_0 120 | - setuptools=47.3.1=py37_0 121 | - sip=4.19.8=py37h6538335_0 122 | - six=1.15.0=py_0 123 | - spacy=2.3.0=py37h1fb7aa8_0 124 | - sqlite=3.32.3=h2a8f88b_0 125 | - srsly=1.0.2=py37h1834ac0_0 126 | - tensorboard=2.2.2=py_0 127 | - tensorboard-plugin-wit=1.6.0=pyh9f0ad1d_0 128 | - terminado=0.8.3=py37_0 129 | - testpath=0.4.4=py_0 130 | - thinc=7.4.1=py37h1fb7aa8_0 131 | - tk=8.6.10=he774522_0 132 | - torchvision=0.7.0=py37_cu101 133 | - tornado=6.0.4=py37he774522_1 134 | - tqdm=4.47.0=py_0 135 | - traitlets=4.3.3=py37_0 136 | - urllib3=1.25.9=py_0 137 | - vc=14.1=h0510ff6_4 138 | - vs2015_runtime=14.16.27012=hf0eaf9b_2 139 | - wasabi=0.7.0=pyh9f0ad1d_0 140 | - wcwidth=0.2.5=py_0 141 | - webencodings=0.5.1=py37_1 142 | - werkzeug=1.0.1=py_0 143 | - wheel=0.34.2=py37_0 144 | - win_inet_pton=1.1.0=py37_0 145 | - wincertstore=0.2=py37_0 146 | - winpty=0.4.3=4 147 | - xz=5.2.5=h62dcd97_0 148 | - zeromq=4.3.2=ha925a31_2 149 | - zipp=3.1.0=py_0 150 | - zlib=1.2.11=h62dcd97_4 151 | - zstd=1.4.5=h04227a9_0 152 | - pip: 153 | - alembic==1.4.2 154 | - antlr4-python3-runtime==4.8 155 | - anykeystore==0.2 156 | - apted==1.0.3 157 | - asgiref==3.2.10 158 | - astunparse==1.6.3 159 | - bcrypt==3.1.7 160 | - bz2file==0.98 161 | - configparser==5.0.0 162 | - coverage==4.5.1 163 | - cryptacular==1.5.5 164 | - cuttlepool==0.9.1 165 | - cython==0.29.21 166 | - dataclasses==0.6 167 | - de-core-news-sm==2.3.0 168 | - django==3.0.8 169 | - docker-pycreds==0.4.0 170 | - editdistance==0.5.3 171 | - en-core-web-sm==2.3.0 172 | - expert-api==0.1.0 173 | - factory-boy==2.9.0 174 | - faker==1.0.7 175 | - filelock==3.0.12 176 | - flake8==3.5.0 177 | - flask==1.0.2 178 | - flask-bcrypt==0.7.1 179 | - flask-cors==3.0.7 180 | - flask-migrate==2.2.0 181 | - flask-sqlalchemy==2.3.2 182 | - flask-testing==0.7.1 183 | - flask-wtf==0.14.2 184 | - func-timeout==4.3.5 185 | - future==0.18.2 186 | - gast==0.3.3 187 | - gitdb==4.0.5 188 | - gitpython==3.1.3 189 | - google-pasta==0.2.0 190 | - gql==0.2.0 191 | - graphql-core==1.1 192 | - h5py==2.10.0 193 | - hupper==1.10.2 194 | - hydra-core==1.0.2 195 | - image==1.5.32 196 | - importlib-resources==3.0.0 197 | - joblib==0.15.1 198 | - keras==2.4.3 199 | - keras-preprocessing==1.1.2 200 | - mako==1.1.3 201 | - mccabe==0.6.1 202 | - meld3==2.0.1 203 | - mysql-connector-repackaged==0.3.1 204 | - ninja==1.10.0.post2 205 | - nltk==3.5 206 | - nvidia-ml-py3==7.352.0 207 | - omegaconf==2.0.2 208 | - opt-einsum==3.2.1 209 | - pastedeploy==2.1.0 210 | - pathtools==0.1.2 211 | - pbkdf2==1.3 212 | - plaster==1.0 213 | - plaster-pastedeploy==0.7 214 | - portalocker==2.0.0 215 | - promise==2.3 216 | - protobuf==3.12.2 217 | - psutil==5.7.0 218 | - pycodestyle==2.3.1 219 | - pyflakes==1.6.0 220 | - pyramid==1.10.4 221 | - pyramid-mailer==0.15.1 222 | - python-dateutil==2.8.0 223 | - python-editor==1.0.4 224 | - python-graphviz==0.14.1 225 | - python3-openid==3.2.0 226 | - pyyaml==5.3.1 227 | - regex==2020.6.8 228 | - repoze-sendmail==4.4.1 229 | - sacrebleu==1.4.14 230 | - sacremoses==0.0.43 231 | - scikit-learn==0.23.1 232 | - scipy==1.4.1 233 | - semql==0.1.0 234 | - sentencepiece==0.1.91 235 | - sentry-sdk==0.15.1 236 | - seqeval==0.0.12 237 | - shortuuid==1.0.1 238 | - simpletransformers==0.40.2 239 | - smmap==3.0.4 240 | - sqlalchemy==1.3.18 241 | - sqlparse==0.3.1 242 | - subprocess32==3.5.4 243 | - supervisor==4.0.2 244 | - tensorboardx==2.0 245 | - tensorflow==2.2.0 246 | - tensorflow-estimator==2.2.0 247 | - termcolor==1.1.0 248 | - text-unidecode==1.2 249 | - threadpoolctl==2.1.0 250 | - timeout-decorator==0.4.1 251 | - tokenizers==0.8.0 252 | - transaction==3.0.0 253 | - transformers==3.0.0 254 | - translationstring==1.4 255 | - trueskill==0.4.5 256 | - typing-extensions==3.7.4.3 257 | - velruse==1.1.1 258 | - venusian==3.0.0 259 | - wandb==0.9.2 260 | - watchdog==0.10.3 261 | - webob==1.8.6 262 | - wget==3.2 263 | - whoosh==2.7.4 264 | - wikiextractor==0.1 265 | - wrapt==1.12.1 266 | - wtforms==2.3.1 267 | - wtforms-recaptcha==0.3.2 268 | - zope-deprecation==4.4.0 269 | - zope-interface==5.1.0 270 | - zope-sqlalchemy==1.3 271 | prefix: C:\Users\Jan\anaconda3\envs\parlai_37 272 | 273 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from templates import app 2 | from templates.src.utils import load_config 3 | config = load_config() 4 | #Load this config object for development mode 5 | app.config.from_object('configurations.DevelopmentConfig') 6 | app.run(host='0.0.0.0', port=config['local_port']) 7 | -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- 1 | # A Template for Creating a Full Stack Web Application with Flask, NPM, Webpack and Reactjs 2 | 3 | Note: this ReadMe is copied from: https://itnext.io/a-template-for-creating-a-full-stack-web-application-with-flask-npm-webpack-and-reactjs-be2294b111bd 4 | 5 | ## About 6 | This is a **Hello React!** web app template written in Python-Flask for the back end and React-JS for the front end. You can use this starter files as a template for your own web app projects. 7 | 8 | ## How I Did It! 9 | You can find a walkthrough of how I build this application from scratch on [my blog](https://medium.com/@tonyparkerkenz/a-template-for-creating-a-full-stack-web-application-with-flask-npm-webpack-and-reactjs-be2294b111bd) 10 | 11 | ## Instructions 12 | Below are the installing and running procedues 13 | ### Installing 14 | 1. make sure you have python, npm, and pip installed on your machine. 15 | For this project, I used : **npm v4.6.1**, **pip v18.0**, **python v3.6.2** 16 | 2. Enter in to the directary *hello_template/templates/static/* and run the command `npm install`. This will download and install all the dependencies listed in *package.json*. 17 | 3. In the static directory, start the npm watcher to build the front end code with the command `npm run watch` 18 | 4. Create a python virtualenv(Optional) 19 | 5. Install flask with the command `$ pip install flask` 20 | 6. Install Reactjs with the command `$ npm i react react-dom --save-dev` 21 | ### Running 22 | 1. Go to the root directory and start the server with `python run.py` 23 | 2. If all is working correctly, you will be given an address http://127.0.0.1:5000/ which you can open in your favorite browser and see our application running and displaying “Hello React!” 24 | 25 | 26 | -------------------------------------------------------------------------------- /templates/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import templates.src.api.dialogue_system_names as dialogue_system_names 3 | import templates.src.api.dialogue as dialogue 4 | import templates.src.api.leaderboard as leaderboard 5 | 6 | app = Flask(__name__, 7 | static_folder = './public', 8 | template_folder="./static") 9 | 10 | from templates.src.views import hello_blueprint 11 | # register the blueprints 12 | app.register_blueprint(hello_blueprint) 13 | 14 | API_MODULES = [dialogue_system_names, dialogue, leaderboard] 15 | 16 | 17 | def register_route(url_path, name, fn, methods=['GET']): 18 | """ 19 | Registers the given `fn` function as the handler, when Flask receives a 20 | request to `url_path`. 21 | """ 22 | app.add_url_rule(url_path, name, fn, methods=methods) 23 | 24 | 25 | # Register all modules stored in API_MODULES 26 | for api_module in API_MODULES: 27 | for r in api_module.routes: 28 | register_route(r['url'], r['name'], r['fn'], r['methods']) 29 | -------------------------------------------------------------------------------- /templates/public/css/main.css: -------------------------------------------------------------------------------- 1 | html { 2 | box-sizing: border-box; 3 | -ms-text-size-adjust: 100%; 4 | -webkit-text-size-adjust: 100%; 5 | } 6 | 7 | *, *:before, *:after { 8 | box-sizing: inherit; 9 | } 10 | 11 | body { 12 | margin: 0; 13 | font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; 14 | } 15 | 16 | h1, h2, h3, h4, h5, h6, p, ul { 17 | margin: 0; 18 | padding: 0; 19 | } 20 | 21 | 22 | #content { 23 | max-width: 1170px; 24 | margin: 0 auto; 25 | } 26 | 27 | .bot0 { 28 | background: rgba(231, 231, 231, 0.15) none repeat scroll 0 0; 29 | float: left; 30 | clear: left; 31 | vertical-align: top; 32 | border-top: darkgrey dotted; 33 | display: inline-block; 34 | } 35 | 36 | .bot1 { 37 | background: rgba(231, 231, 231, 0.15) none repeat scroll 0 0; 38 | float: left; 39 | clear: left; 40 | vertical-align: top; 41 | border-top: darkgrey dotted; 42 | display: inline-block; 43 | } 44 | 45 | .bot_img0 { 46 | height: 50px; 47 | } 48 | 49 | .bot_img1 { 50 | height: 50px; 51 | } 52 | 53 | .bot_img_div0 { 54 | /*display: inline-block; */ 55 | float: left; 56 | padding: 1px; 57 | margin-right: 10px; 58 | } 59 | 60 | .bot_img_div1 { 61 | float: right; 62 | padding: 1px; 63 | margin-left: 10px; 64 | } 65 | 66 | .bot0 p { 67 | border-radius: 3px; 68 | font-size: 14px; 69 | margin: 0; 70 | color: black; 71 | padding: 1px 10px 5px 12px; 72 | 73 | } 74 | 75 | .bot1 p { 76 | border-radius: 3px; 77 | color: black; 78 | font-size: 14px; 79 | margin: 0; 80 | padding: 1px 10px 5px 12px; 81 | width: 100%; 82 | } 83 | 84 | .speaker-id0 { 85 | font-weight: bold; 86 | } 87 | 88 | .speaker-id1 { 89 | font-weight: bold; 90 | } 91 | 92 | #dialogue-content { 93 | margin: 5px; 94 | padding-top: 10px; 95 | overflow: hidden; 96 | } 97 | 98 | .next_turn { 99 | display: inline-block; 100 | float: left; 101 | } 102 | 103 | .bot_inactive_turn { 104 | display: none; 105 | } 106 | 107 | .is_not_last_turn { 108 | display: none; 109 | } 110 | 111 | .is_not_dialogue_end { 112 | display: none; 113 | } 114 | 115 | .submission_div { 116 | display: none; 117 | } 118 | 119 | #instruction-text-hidden { 120 | display: none; 121 | } 122 | 123 | 124 | #root_black { 125 | background: rgba(0,0,0,0.67); 126 | } 127 | 128 | .root_green { 129 | background: rgba(13,142,7,0.67); 130 | } 131 | 132 | .root_yellow_green { 133 | background: rgba(154, 205, 50, 0.91); 134 | } 135 | 136 | .root_yellow { 137 | background: rgba(207,209,0,0.99); 138 | } 139 | 140 | .root_red { 141 | background: rgba(209,6,0,0.99); 142 | } 143 | 144 | .root_darkred { 145 | background: rgba(125,6,0,0.99); 146 | } 147 | 148 | .red_score{ 149 | color: red; 150 | } 151 | 152 | .green_score{ 153 | color: lawngreen; 154 | } 155 | 156 | .row_BASELINE{ 157 | background: green; 158 | } 159 | 160 | .row_MINIMAL-BASELINE{ 161 | background: red; 162 | } -------------------------------------------------------------------------------- /templates/public/css/radio-button.css: -------------------------------------------------------------------------------- 1 | .funkyradio div { 2 | clear: both; 3 | overflow: hidden; 4 | } 5 | 6 | .funkyradio label { 7 | width: 100%; 8 | border-radius: 3px; 9 | border: 1px solid #D1D3D4; 10 | font-weight: normal; 11 | } 12 | 13 | .funkyradio input[type="radio"]:empty, 14 | .funkyradio input[type="checkbox"]:empty { 15 | display: none; 16 | } 17 | 18 | .funkyradio input[type="radio"]:empty ~ label, 19 | .funkyradio input[type="checkbox"]:empty ~ label { 20 | position: relative; 21 | line-height: 2.5em; 22 | text-indent: 3.25em; 23 | margin-top: 0; 24 | cursor: pointer; 25 | -webkit-user-select: none; 26 | -moz-user-select: none; 27 | -ms-user-select: none; 28 | user-select: none; 29 | } 30 | 31 | .funkyradio input[type="radio"]:empty ~ label:before, 32 | .funkyradio input[type="checkbox"]:empty ~ label:before { 33 | position: absolute; 34 | display: block; 35 | top: 0; 36 | bottom: 0; 37 | left: 0; 38 | content: ''; 39 | width: 2.5em; 40 | background: #D1D3D4; 41 | border-radius: 3px 0 0 3px; 42 | } 43 | 44 | .funkyradio input[type="radio"]:hover:not(:checked) ~ label, 45 | .funkyradio input[type="checkbox"]:hover:not(:checked) ~ label { 46 | color: #888; 47 | } 48 | 49 | .funkyradio input[type="radio"]:hover:not(:checked) ~ label:before, 50 | .funkyradio input[type="checkbox"]:hover:not(:checked) ~ label:before { 51 | content: '\2714'; 52 | text-indent: .9em; 53 | color: #C2C2C2; 54 | } 55 | 56 | .funkyradio input[type="radio"]:checked ~ label, 57 | .funkyradio input[type="checkbox"]:checked ~ label { 58 | color: #777; 59 | } 60 | 61 | .funkyradio input[type="radio"]:checked ~ label:before, 62 | .funkyradio input[type="checkbox"]:checked ~ label:before { 63 | content: '\2714'; 64 | text-indent: .9em; 65 | color: #333; 66 | background-color: #ccc; 67 | } 68 | 69 | .funkyradio input[type="radio"]:focus ~ label:before, 70 | .funkyradio input[type="checkbox"]:focus ~ label:before { 71 | box-shadow: 0 0 0 3px #999; 72 | } 73 | 74 | .funkyradio-default input[type="radio"]:checked ~ label:before, 75 | .funkyradio-default input[type="checkbox"]:checked ~ label:before { 76 | color: #333; 77 | background-color: #ccc; 78 | } 79 | 80 | .funkyradio-primary input[type="radio"]:checked ~ label:before, 81 | .funkyradio-primary input[type="checkbox"]:checked ~ label:before { 82 | color: #fff; 83 | background-color: #337ab7; 84 | } 85 | 86 | .funkyradio-success input[type="radio"]:checked ~ label:before, 87 | .funkyradio-success input[type="checkbox"]:checked ~ label:before { 88 | color: #fff; 89 | background-color: #5cb85c; 90 | } 91 | 92 | .funkyradio-danger input[type="radio"]:checked ~ label:before, 93 | .funkyradio-danger input[type="checkbox"]:checked ~ label:before { 94 | color: #fff; 95 | background-color: #d9534f; 96 | } 97 | 98 | .funkyradio-warning input[type="radio"]:checked ~ label:before, 99 | .funkyradio-warning input[type="checkbox"]:checked ~ label:before { 100 | color: #fff; 101 | background-color: #f0ad4e; 102 | } 103 | 104 | .funkyradio-info input[type="radio"]:checked ~ label:before, 105 | .funkyradio-info input[type="checkbox"]:checked ~ label:before { 106 | color: #fff; 107 | background-color: #5bc0de; 108 | } 109 | 110 | .slidecontainer { 111 | width: 100%; 112 | } 113 | 114 | .slider { 115 | -webkit-appearance: none; 116 | width: 100%; 117 | height: 15px; 118 | border-radius: 5px; 119 | background: #d3d3d3; 120 | outline: none; 121 | opacity: 0.7; 122 | -webkit-transition: .2s; 123 | transition: opacity .2s; 124 | } 125 | 126 | .slider:hover { 127 | opacity: 1; 128 | } 129 | 130 | .slider::-webkit-slider-thumb { 131 | -webkit-appearance: none; 132 | appearance: none; 133 | width: 25px; 134 | height: 25px; 135 | border-radius: 50%; 136 | background: #4CAF50; 137 | cursor: pointer; 138 | } 139 | 140 | .slider::-moz-range-thumb { 141 | width: 25px; 142 | height: 25px; 143 | border-radius: 50%; 144 | background: #4CAF50; 145 | cursor: pointer; 146 | } -------------------------------------------------------------------------------- /templates/public/img/bot_blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/public/img/bot_blue.jpg -------------------------------------------------------------------------------- /templates/public/img/bot_green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/public/img/bot_green.jpg -------------------------------------------------------------------------------- /templates/public/img/bot_red.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/public/img/bot_red.jpg -------------------------------------------------------------------------------- /templates/public/img/bot_yellow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/public/img/bot_yellow.jpg -------------------------------------------------------------------------------- /templates/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/src/__init__.py -------------------------------------------------------------------------------- /templates/src/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/src/api/__init__.py -------------------------------------------------------------------------------- /templates/src/api/dialogue.py: -------------------------------------------------------------------------------- 1 | from templates.src.mongo_client import sampled_collection, labelled_collection, package_collection 2 | from flask import jsonify, request 3 | import random 4 | from bson import ObjectId 5 | import time 6 | from templates.src.utils import load_config 7 | 8 | config = load_config() 9 | 10 | 11 | def get_random_dialogue(): 12 | """ 13 | GET /api/leaderboard/list 14 | Returns a list of leaderboard entries. 15 | """ 16 | dialogue_system = request.args.get('dialogue_system', None) 17 | dialogue_domains = request.args.get('dialogue_domains', None) 18 | 19 | if dialogue_system is not None and dialogue_domains is not None and not dialogue_system == 'Select Item': 20 | results = list(sampled_collection.find({'system_name': dialogue_system, 'domain_name': dialogue_domains})) 21 | elif dialogue_system is not None and not dialogue_system == 'Select Item': 22 | results = list(sampled_collection.find({'system_name': dialogue_system})) 23 | elif dialogue_domains is not None and not dialogue_domains == 'All Domains': 24 | results = list(sampled_collection.find({'domain_name': dialogue_domains})) 25 | else: 26 | results = list(sampled_collection.find({})) 27 | 28 | dialogue = random.choice(results) 29 | dialogue['_id'] = str(dialogue['_id']) 30 | return jsonify(dialogue) 31 | 32 | 33 | def get_list_of_dialogues(): 34 | dialogue_system = request.args.get('dialogue_system', None) 35 | domain = request.args.get('domain', None) 36 | dummy_value = 'All Domains' 37 | if dialogue_system is not None and domain is not None and not dialogue_system == dummy_value: 38 | results = list(sampled_collection.find({'system_name': dialogue_system, 'domain_name': domain})) 39 | elif dialogue_system is not None and not dialogue_system == dummy_value: 40 | results = list(sampled_collection.find({'system_name': dialogue_system})) 41 | elif domain is not None and not domain == dummy_value: 42 | results = list(sampled_collection.find({'domain_name': domain})) 43 | else: 44 | results = list(sampled_collection.find({})) 45 | 46 | result_ids = [str(result['_id']) for result in results] 47 | return jsonify(result_ids) 48 | 49 | 50 | def get_dialogue_for_id(): 51 | """ 52 | GET /api/leaderboard/list 53 | Returns a list of leaderboard entries. 54 | """ 55 | dialogue_id = request.args.get('dialogue_id', None) 56 | if dialogue_id == 'Select Item': 57 | return jsonify(None) 58 | dialogue = sampled_collection.find_one({'_id': ObjectId(dialogue_id)}) 59 | if dialogue is None: 60 | return jsonify({}), 500 61 | now = int(round(time.time() * 1000)) 62 | 63 | dialogue['_id'] = str(dialogue['_id']) 64 | dialogue['start_time'] = now 65 | return jsonify(dialogue) 66 | 67 | 68 | def get_package_for_id(): 69 | package_id = request.args.get('package_id', None) 70 | package = package_collection.find_one({'_id': ObjectId(package_id)}) 71 | return jsonify({'pkg_list': package['package']}) 72 | 73 | 74 | def post_dialogue(): 75 | if request.method == 'POST': 76 | data = request.get_json(force=True) 77 | now = int(round(time.time() * 1000)) 78 | data['end_time'] = now 79 | data['elapsed_time'] = now - data['start_time'] 80 | print(data) 81 | labelled_collection.insert_one(data) 82 | return jsonify({'msg': 'OK'}), 200 83 | else: 84 | return 'OK', 201 85 | 86 | 87 | def get_number_of_packages_for_user(): 88 | user_name = request.args.get('user_name', None) 89 | pipeline = [ 90 | { 91 | "$group": { 92 | "_id": "$user_name", 93 | "packages": {"$addToSet": "$package_id"} 94 | } 95 | } 96 | ] 97 | result = labelled_collection.aggregate(pipeline) 98 | n_packages = [len(res['packages']) for res in result if res['_id'] == user_name] 99 | if n_packages == []: 100 | n_packages = 0 101 | else: 102 | n_packages = n_packages[0] 103 | 104 | max_package_per_user = config['max_package_per_user'] 105 | return jsonify({'package_for_user': n_packages, 'max_package_per_user': max_package_per_user}) 106 | 107 | 108 | 109 | routes = [{'url': '/api/random_dialogue', 110 | 'name': 'random_dialogue', 111 | 'fn': get_random_dialogue, 112 | 'methods': ['GET']}, 113 | 114 | {'url': '/api/list_of_dialogues', 115 | 'name': 'list_of_dialogues', 116 | 'fn': get_list_of_dialogues, 117 | 'methods': ['GET']}, 118 | 119 | {'url': '/api/get_dialogue_for_id', 120 | 'name': 'get_dialogue_for_id', 121 | 'fn': get_dialogue_for_id, 122 | 'methods': ['GET']}, 123 | 124 | {'url': '/api/post_decision', 125 | 'name': 'post_decision', 126 | 'fn': post_dialogue, 127 | 'methods': ['POST']}, 128 | 129 | {'url': '/api/get_package_for_id', 130 | 'name': 'get_package_for_id', 131 | 'fn': get_package_for_id, 132 | 'methods': ['GET']}, 133 | 134 | {'url': '/api/get_number_of_packages_for_user', 135 | 'name': 'get_number_of_packages_for_user', 136 | 'fn': get_number_of_packages_for_user, 137 | 'methods': ['GET']} 138 | ] 139 | -------------------------------------------------------------------------------- /templates/src/api/dialogue_system_names.py: -------------------------------------------------------------------------------- 1 | from templates.src.mongo_client import sampled_collection 2 | from flask import jsonify 3 | 4 | 5 | def get_dialogue_systems(): 6 | """ 7 | GET /api/leaderboard/list 8 | Returns a list of leaderboard entries. 9 | """ 10 | results = sampled_collection.aggregate(pipeline=[ 11 | { 12 | "$group": { 13 | "_id": "$system_name", 14 | } 15 | } 16 | ]) 17 | 18 | dialogue_systems = sorted([{'id': res['_id'], 'name': res['_id']} for res in results], key=lambda x: x['name']) 19 | 20 | return jsonify(dialogue_systems) 21 | 22 | def get_dialogue_domains(): 23 | results = sampled_collection.aggregate(pipeline=[ 24 | { 25 | "$group": { 26 | "_id": "$domain_name", 27 | } 28 | } 29 | ]) 30 | 31 | dialogue_systems = sorted([{'id': res['_id'], 'name': res['_id']} for res in results], key=lambda x: x['name']) 32 | 33 | return jsonify(dialogue_systems) 34 | 35 | 36 | routes = [{'url': '/api/dialouge_systems', 37 | 'name': 'dialouge_systems', 38 | 'fn': get_dialogue_systems, 39 | 'methods': ['GET']}, 40 | {'url': '/api/dialouge_domains', 41 | 'name': 'dialouge_domains', 42 | 'fn': get_dialogue_domains, 43 | 'methods': ['GET']}] 44 | -------------------------------------------------------------------------------- /templates/src/api/leaderboard.py: -------------------------------------------------------------------------------- 1 | from templates.src.scoring_utils import get_leaderboard as retrieve_leaderboard, compute_scores_for_user 2 | from flask import jsonify, request 3 | 4 | 5 | def get_leaderboard(): 6 | return jsonify(retrieve_leaderboard()) 7 | 8 | def get_score_for_user(): 9 | user_name = request.args.get('user_name', None) 10 | final_score, avg_corr, avg_turn_penalty = compute_scores_for_user(user_name) 11 | ret_dict = { 12 | 'final_score': final_score, 13 | 'avg_corr': avg_corr, 14 | 'avg_turn_penalty': avg_turn_penalty 15 | } 16 | return jsonify(ret_dict) 17 | 18 | routes = [{'url': '/api/get_leaderboard', 19 | 'name': 'get_leaderboard', 20 | 'fn': get_leaderboard, 21 | 'methods': ['GET']}, 22 | {'url': '/api/get_score_for_user', 23 | 'name': 'get_score_for_user', 24 | 'fn': get_score_for_user, 25 | 'methods': ['GET']}, 26 | ] 27 | -------------------------------------------------------------------------------- /templates/src/mongo_client.py: -------------------------------------------------------------------------------- 1 | from pymongo import MongoClient 2 | from templates.src.utils import load_config 3 | 4 | config = load_config() 5 | 6 | DATABASE_NAME = config['database_name'] 7 | SAMPLED_COLLECTION_NAME = config['sampled_collection_name'] 8 | ANNOTATED_COLLECTION_NAME = config['labelled_collection_name'] 9 | PACKET_COLLECTION_NAME = config['package_collection_name'] 10 | 11 | SEMI_SUPERVISED_COLLECTION_NAME = config['unlabelled_data'] 12 | 13 | client = MongoClient( 14 | config['host'], 15 | config['port'], 16 | username=config['user'], 17 | password=config['password'], 18 | authSource=DATABASE_NAME 19 | ) 20 | 21 | db = client[DATABASE_NAME] 22 | sampled_collection = db[SAMPLED_COLLECTION_NAME] 23 | labelled_collection = db[ANNOTATED_COLLECTION_NAME] 24 | package_collection = db[PACKET_COLLECTION_NAME] 25 | semi_supervised_collection = db[SEMI_SUPERVISED_COLLECTION_NAME] 26 | -------------------------------------------------------------------------------- /templates/src/packaging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/src/packaging/__init__.py -------------------------------------------------------------------------------- /templates/src/packaging/dump_package_uri.py: -------------------------------------------------------------------------------- 1 | from templates.src.mongo_client import sampled_collection, PACKET_COLLECTION_NAME, package_collection 2 | from os.path import join 3 | import random 4 | from templates.src.packaging.naive_strategy import Naive 5 | 6 | data_path = 'data/uri_dumps' 7 | 8 | BASE_URL = 'http://160.85.252.225:5003/pkgsg?id=' 9 | 10 | ofile = open(join(data_path, '{}.csv'.format(PACKET_COLLECTION_NAME)), 'wt', encoding='utf-8') 11 | ofile.write('uri\n') 12 | results = sampled_collection.find({}) 13 | package_collection.remove({}) 14 | 15 | strategy = Naive(list(results), 20, segments=[5, 9, 13]) # 16 | #strategy = RatioStrategy(list(results), 20, 4, segments=[3, 5, 9]) 17 | 18 | uri_list = [] 19 | for package in strategy.create_chunks(): 20 | data_point = {} 21 | data_point['package'] = package 22 | inserted_id = package_collection.insert_one(data_point) 23 | pid = inserted_id.inserted_id 24 | url = BASE_URL + str(pid) 25 | uri_list.append(url) 26 | 27 | random.shuffle(uri_list) 28 | for url in uri_list: 29 | ofile.write(url + '\n') 30 | 31 | ofile.close() -------------------------------------------------------------------------------- /templates/src/packaging/naive_strategy.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class Naive: 4 | def __init__(self, convos, chunk_size, segments=[0]): 5 | self.convos = convos 6 | self.chunk_size = chunk_size 7 | self.segments = segments 8 | 9 | def chunks(self, l, n): 10 | """Yield successive n-sized chunks from l.""" 11 | for i in range(0, len(l), n): 12 | yield l[i:i + n] 13 | 14 | def chunking(self, _list, chunk_len): 15 | random.shuffle(_list) 16 | return [chunk for chunk in self.chunks(_list, chunk_len)] 17 | 18 | def create_chunks(self): 19 | convo_id_to_segments = {} 20 | for convo in self.convos: 21 | # we pop then the segment from the top -> we want to have a random order 22 | sgm = [x for x in self.segments] 23 | random.shuffle(sgm) 24 | convo_id_to_segments[str(convo['_id'])] = sgm 25 | 26 | for _ in range(len(self.segments)): 27 | convos_chunks = self.chunking(self.convos, self.chunk_size) 28 | random.shuffle(convos_chunks) 29 | for convos_chunk in convos_chunks: 30 | ret_chunk = [] 31 | for convo in convos_chunk: 32 | convo_id = str(convo['_id']) 33 | start_turn = convo_id_to_segments[convo_id].pop() 34 | if start_turn == -1: 35 | start_turn = len(convo['convo']) 36 | ret_chunk.append({'convo_id': convo_id, 'start_turn': start_turn}) 37 | random.shuffle(ret_chunk) 38 | yield ret_chunk 39 | 40 | -------------------------------------------------------------------------------- /templates/src/packaging/ratio_strategy.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import math 3 | import random 4 | 5 | class RatioStrategy: 6 | def __init__(self, convos, chunk_size, ratio, segments=[0]): 7 | self.convos = convos 8 | self.chunk_size = chunk_size 9 | self.ratio = ratio 10 | self.create_bins() 11 | self.segments = segments 12 | 13 | def chunks(self, l, n): 14 | """Yield successive n-sized chunks from l.""" 15 | for i in range(0, len(l), n): 16 | yield l[i:i + n] 17 | 18 | def create_bins(self): 19 | type_to_bin = defaultdict(lambda: []) 20 | # create_random_packages 21 | cid_list = [] 22 | for result in self.convos: 23 | cid = str(result['_id']) 24 | cid_list.append(cid) 25 | is_human0 = result['is_human0'] 26 | is_human1 = result['is_human1'] 27 | 28 | if is_human0 and is_human1: 29 | type_to_bin['hh'].append(result) 30 | elif not is_human0 and not is_human1: 31 | type_to_bin['bb'].append(result) 32 | else: 33 | type_to_bin['hb'].append(result) 34 | self.type_to_bin = type_to_bin 35 | 36 | def chunking(self, _list, chunk_len): 37 | random.shuffle(_list) 38 | return [chunk for chunk in self.chunks(_list, chunk_len)] 39 | 40 | def create_chunks(self): 41 | n_chunks = len(self.type_to_bin['bb'])/self.chunk_size 42 | hh_chunk_len = math.ceil(self.chunk_size/self.ratio) #math.ceil((len(self.type_to_bin['hh']) + len(self.type_to_bin['hb']))/n_chunks) 43 | resample_hh = int(n_chunks*hh_chunk_len - len(self.type_to_bin['hh']) - len(self.type_to_bin['hb'])) 44 | human_convos = self.type_to_bin['hb'] + self.type_to_bin['hh'] 45 | bot_convos = self.type_to_bin['bb'] 46 | if resample_hh > 0: 47 | resampled_hh = [random.choice(self.type_to_bin['hh']) for _ in range(resample_hh)] 48 | human_convos.extend(resampled_hh) 49 | elif resample_hh < 0: 50 | human_convos = random.sample(human_convos, k=len(human_convos) + resample_hh) 51 | 52 | convo_id_to_segments = {} 53 | for convo in human_convos + bot_convos: 54 | #we pop then the segment from the top -> we want to have a random order 55 | sgm = [x for x in self.segments] 56 | random.shuffle(sgm) 57 | convo_id_to_segments[str(convo['_id'])] = sgm 58 | 59 | for _ in range(len(self.segments)): 60 | bb_chunks = self.chunking(bot_convos, self.chunk_size) 61 | human_chunks = self.chunking(human_convos, hh_chunk_len) 62 | 63 | assert len(bb_chunks) == len(human_chunks) 64 | 65 | random.shuffle(bb_chunks) 66 | random.shuffle(human_chunks) 67 | 68 | for bb_chunk, h_chunk in zip(bb_chunks, human_chunks): 69 | ret_chunk = [] 70 | for convo in bb_chunk + h_chunk: 71 | convo_id = str(convo['_id']) 72 | start_turn = convo_id_to_segments[convo_id].pop() 73 | if start_turn == -1: 74 | start_turn = len(convo['convo']) 75 | if start_turn > len(convo['convo']) - 1: 76 | continue 77 | ret_chunk.append({'convo_id': convo_id, 'start_turn': start_turn}) 78 | 79 | random.shuffle(ret_chunk) 80 | yield ret_chunk 81 | -------------------------------------------------------------------------------- /templates/src/segment_analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/src/segment_analysis/__init__.py -------------------------------------------------------------------------------- /templates/src/segment_analysis/annotation_statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Computes the statistics of the Annotators 3 | """ 4 | 5 | from templates.src.scoring_utils import labelled_collection 6 | from templates.src.segment_analysis.annotator_scores import get_package_annotations, get_all_annotated_convos 7 | import itertools 8 | 9 | def get_total_annotations(): 10 | return labelled_collection.count({}) 11 | 12 | def get_total_annotation_filtered(): 13 | annotation_id_blacklist = get_package_annotations() 14 | 15 | return labelled_collection.count({'_id': {'$nin': annotation_id_blacklist}}) 16 | 17 | 18 | def number_of_annotators(): 19 | results = labelled_collection.aggregate(pipeline=[ 20 | { 21 | "$group": { 22 | "_id": {"uid": "$user_name"}, 23 | "convos": {"$sum": 1} 24 | } 25 | } 26 | ]) 27 | 28 | return len(list(results)) 29 | 30 | 31 | def number_of_annotators_filtered(): 32 | annotation_id_blacklist = get_package_annotations() 33 | results = labelled_collection.aggregate(pipeline=[ 34 | { 35 | "$match": { 36 | '_id': {'$nin': annotation_id_blacklist} 37 | } 38 | }, 39 | { 40 | "$group": { 41 | "_id": {"uid": "$user_name"}, 42 | "convos": {"$sum": 1} 43 | } 44 | } 45 | ]) 46 | 47 | return len(list(results)) 48 | 49 | def average_time_per_package(): 50 | annotation_id_blacklist = get_package_annotations() 51 | 52 | results_start = labelled_collection.aggregate(pipeline=[ 53 | { 54 | "$match": { 55 | '_id': {'$nin': annotation_id_blacklist} 56 | } 57 | }, 58 | { 59 | "$group": { 60 | "_id": {"pid": "$package_id", "uid": "$user_name"}, 61 | "start_time": {"$min": "$start_time"} 62 | } 63 | } 64 | ]) 65 | 66 | results_end = labelled_collection.aggregate(pipeline=[ 67 | { 68 | "$match": { 69 | '_id': {'$nin': annotation_id_blacklist} 70 | } 71 | }, 72 | { 73 | "$group": { 74 | "_id": {"pid": "$package_id", "uid": "$user_name"}, 75 | "end_time": {"$max": "$end_time"} 76 | } 77 | } 78 | ]) 79 | 80 | min_time_for_id = {(res['_id']['pid'], res['_id']['uid']): res['start_time'] for res in results_start} 81 | 82 | times_in_ms = [] 83 | for result in results_end: 84 | pid = result['_id']['pid'] 85 | uid = result['_id']['uid'] 86 | 87 | end_time = result['end_time'] 88 | 89 | if min_time_for_id.get((pid, uid), None) is not None: 90 | start_time = min_time_for_id[(pid, uid)] 91 | times_in_ms.append(end_time-start_time) 92 | 93 | sorted_times = sorted(times_in_ms) 94 | if (len(sorted_times)%2) == 0: 95 | idx = int(len(sorted_times)/2) 96 | return (sorted_times[idx] + sorted_times[idx-1])/2 97 | else: 98 | idx = int((len(sorted_times) - 1)/2) 99 | return sorted_times[idx] 100 | 101 | 102 | def median_time_per_annotation(): 103 | annotation_id_blacklist = get_package_annotations() 104 | 105 | results = labelled_collection.aggregate(pipeline=[ 106 | { 107 | "$match": { 108 | '_id': {'$nin': annotation_id_blacklist} 109 | } 110 | }, 111 | { 112 | "$group": { 113 | "_id": "dummy", 114 | "elapsed_times": {"$addToSet": "$elapsed_time"} 115 | } 116 | } 117 | ]) 118 | elapsed_times = list(results)[0]['elapsed_times'] 119 | sorted_elapsed_times = sorted(elapsed_times) 120 | 121 | if (len(sorted_elapsed_times) % 2) == 0: 122 | idx = int(len(sorted_elapsed_times) / 2) 123 | return (sorted_elapsed_times[idx] + sorted_elapsed_times[idx - 1]) / 2 124 | else: 125 | idx = int((len(sorted_elapsed_times) - 1) / 2) 126 | return sorted_elapsed_times[idx] 127 | 128 | 129 | def total_time(): 130 | annotation_id_blacklist = get_package_annotations() 131 | 132 | results = labelled_collection.aggregate(pipeline=[ 133 | { 134 | "$match": { 135 | '_id': {'$nin': annotation_id_blacklist} 136 | } 137 | }, 138 | { 139 | "$group": { 140 | "_id": "dummy", 141 | "total_time": {"$sum": "$elapsed_time"} 142 | } 143 | } 144 | ]) 145 | total_time = list(results)[0]['total_time'] 146 | #convert to hrs 147 | return total_time/1000/60/60 148 | 149 | 150 | def annotator_agreement(feature='is_human'): 151 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=False) 152 | total = 0 153 | agreements = 0 154 | for cid, convo in convo_id_to_convo.items(): 155 | annotations = convo['annotations'] 156 | for tid, annotation_list in annotations.items(): 157 | combos = list(itertools.combinations(annotation_list, 2)) 158 | for c1, c2 in combos: 159 | if feature=='is_human': 160 | p0 = c1['entity0_annotation'][feature] 161 | p1 = c2['entity0_annotation'][feature] 162 | agreements += int(p0 == p1) 163 | 164 | p0 = c1['entity1_annotation'][feature] 165 | p1 = c2['entity1_annotation'][feature] 166 | agreements += int(p0 == p1) 167 | total += 2 168 | else: 169 | entity0_annotation1 = c1['entity0_annotation'][feature] 170 | entity0_annotation2 = c2['entity0_annotation'][feature] 171 | 172 | entity1_annotation1 = c1['entity1_annotation'][feature] 173 | entity1_annotation2 = c2['entity1_annotation'][feature] 174 | 175 | agreements += int(entity0_annotation1 == entity0_annotation2 and entity1_annotation1 == entity1_annotation2) 176 | total += 1 177 | 178 | return agreements/total 179 | 180 | if __name__ == "__main__": 181 | print('Number of Annotations Raw:\t{}'.format(get_total_annotations())) 182 | print('Number of Annotations Filtered:\t{}'.format(get_total_annotation_filtered())) 183 | print('Number of Annotators:\t{}'.format(number_of_annotators())) 184 | print('Number of Annotators Filtered:\t{}'.format(number_of_annotators_filtered())) 185 | print('Median Time per HIT:\t{}'.format(average_time_per_package())) 186 | print('Median Time per Annotation:\t{}'.format(median_time_per_annotation())) 187 | print('Total Time:\t{}'.format(total_time())) 188 | print('Agreement for Spot The Bot:\t{}'.format(annotator_agreement())) 189 | print('Agreement for Fluency:\t{}'.format(annotator_agreement(feature='fluencyValue'))) 190 | print('Agreement for Sensibleness:\t{}'.format(annotator_agreement(feature='sensitivenessValue'))) 191 | print('Agreement for Specificity:\t{}'.format(annotator_agreement(feature='specificityValue'))) -------------------------------------------------------------------------------- /templates/src/segment_analysis/annotator_scores.py: -------------------------------------------------------------------------------- 1 | """ 2 | Computes how well the annotators perform the task. 3 | """ 4 | 5 | from templates.src.scoring_utils import labelled_collection, sampled_collection, package_collection 6 | from bson import ObjectId 7 | from collections import defaultdict, Counter 8 | user_black_list = [''] 9 | 10 | 11 | def get_all_annotated_convos(ignore_humans=False, apply_blacklist=True): 12 | if apply_blacklist: 13 | annotation_id_blacklist = get_package_annotations() 14 | else: 15 | annotation_id_blacklist = [''] 16 | annotation_results = [res for res in labelled_collection.find({'user_name': {'$nin': user_black_list}, '_id': {'$nin': annotation_id_blacklist}})] 17 | oids = [ObjectId(ann['convo_id']) for ann in annotation_results] 18 | convo_results = sampled_collection.find({'_id': {'$in': oids}}) 19 | 20 | convo_id_to_convo = {} # make it searchable 21 | for convo in convo_results: 22 | system_name0 = convo['system_type0'] 23 | system_name1 = convo['system_type1'] 24 | 25 | if ignore_humans and (system_name0 == 'human' or system_name1 == 'human'): 26 | #print('Ignore Human') 27 | continue 28 | else: 29 | convo['annotations'] = defaultdict(lambda: []) 30 | convo_id_to_convo[str(convo['_id'])] = convo 31 | 32 | for annotation in annotation_results: 33 | annotation['_id'] = str(annotation['_id']) 34 | convo_id = annotation['convo_id'] 35 | convo = convo_id_to_convo.get(convo_id, None) 36 | if convo is not None: 37 | turn_nr = int((annotation['entity0_annotation']['decision_turn'])/2) 38 | convo['annotations'][turn_nr].append(annotation) 39 | 40 | return convo_id_to_convo 41 | 42 | def get_package_annotations(): 43 | results = labelled_collection.aggregate(pipeline=[ 44 | { 45 | "$group": { 46 | "_id": {"pid": "$package_id", "uid": "$user_name"}, 47 | "convos": {"$addToSet": "$convo_id"} 48 | } 49 | } 50 | ]) 51 | 52 | pres = package_collection.find({}) 53 | pid_to_cids = {str(p['_id']): set([c['convo_id'] for c in p['package'] ])for p in pres} 54 | 55 | annotation_blacklist = [] 56 | for res in results: 57 | pid = res['_id']['pid'] 58 | convo_ids = set(res['convos']) 59 | if pid_to_cids.get(pid, None) is None: 60 | continue 61 | expected_ids = set(pid_to_cids[pid]) 62 | 63 | if not convo_ids == expected_ids: 64 | annotation_blacklist.append(res['_id']) 65 | 66 | annotation_id_blacklist = [] 67 | for entry in annotation_blacklist: 68 | annotations = labelled_collection.find({'package_id': entry['pid'], 'user_name': entry['uid']}) 69 | aids = [annotation['_id'] for annotation in annotations] 70 | 71 | annotation_id_blacklist.extend(aids) 72 | 73 | return annotation_id_blacklist 74 | 75 | 76 | def compute_agreement_for_annotation(annotations, feature='is_human'): 77 | ent0_ann = set() 78 | ent1_ann = set() 79 | for annotation in annotations: 80 | ann0 = annotation['entity0_annotation'][feature] 81 | ann1 = annotation['entity1_annotation'][feature] 82 | 83 | ent0_ann.add(ann0) 84 | ent1_ann.add(ann1) 85 | #we expect that if all agree, that a set has only one element 86 | agreement0 = int(len(ent0_ann) == 1) 87 | agreement1 = int(len(ent1_ann) == 1) 88 | 89 | #agreement score, number of annotations 90 | return agreement0 + agreement1, 2 91 | 92 | 93 | def compute_convo_agreement(convo_id_to_convo, feature='is_human'): 94 | agreement_score, total_annotations = 0, 0 95 | for cid, convo in convo_id_to_convo.items(): 96 | for tid, annotations in convo['annotations'].items(): 97 | ascore, n = compute_agreement_for_annotation(annotations, feature) 98 | agreement_score += ascore 99 | total_annotations += n 100 | return agreement_score, total_annotations, agreement_score/total_annotations 101 | 102 | 103 | def annotator_score(convo_id_to_convo): 104 | correctness_score = defaultdict(lambda : [0, 0]) 105 | for cid, convo in convo_id_to_convo.items(): 106 | is_human0 = convo['is_human0'] 107 | is_human1 = convo['is_human1'] 108 | for tid, annotations in convo['annotations'].items(): 109 | for annotation in annotations: 110 | ann0 = annotation['entity0_annotation']['is_human'] 111 | ann1 = annotation['entity1_annotation']['is_human'] 112 | user_name = annotation['user_name'] 113 | if ann0 is not None: 114 | correctness_score[user_name][0] += int(ann0 == is_human0) 115 | correctness_score[user_name][1] += 1 116 | if ann1 is not None: 117 | correctness_score[user_name][0] += int(ann1 == is_human1) 118 | correctness_score[user_name][1] += 1 119 | 120 | correctness_rate_for_user = {} 121 | for user, scores in correctness_score.items(): 122 | correctness_rate_for_user[user] = scores[0]/scores[1] 123 | return correctness_rate_for_user 124 | 125 | 126 | def label_distribution_for_annotator(convo_id_to_convo): 127 | distribution_for_user = defaultdict(lambda: defaultdict(lambda : 0)) 128 | for cid, convo in convo_id_to_convo.items(): 129 | for tid, annotations in convo['annotations'].items(): 130 | for annotation in annotations: 131 | ann0 = annotation['entity0_annotation']['is_human'] 132 | ann1 = annotation['entity1_annotation']['is_human'] 133 | user_name = annotation['user_name'] 134 | distribution_for_user[user_name][ann0] += 1 135 | distribution_for_user[user_name][ann1] += 1 136 | 137 | distribution_rate_for_user = defaultdict(lambda: defaultdict(lambda : 0)) 138 | for user, ratio in distribution_for_user.items(): 139 | total = sum(ratio.values()) 140 | for label, val in ratio.items(): 141 | r = val/total 142 | distribution_rate_for_user[user][label] = r 143 | return distribution_rate_for_user 144 | 145 | 146 | def unfinished_packets(): 147 | annotation_id_blacklist = get_package_annotations() 148 | annotation_results = [res for res in labelled_collection.find({'_id': {'$nin': annotation_id_blacklist}})] 149 | package_ids = set([(res['package_id'], res['user_name'])for res in annotation_results]) 150 | expected_pids = set([str(res['_id']) for res in package_collection.find({})]) 151 | 152 | pid_cnt = Counter() 153 | for pid, uid in package_ids: 154 | pid_cnt.update([pid]) 155 | for pid, cnt in pid_cnt.items(): 156 | if cnt != 2: 157 | print(pid, cnt) 158 | rest = expected_pids.difference(pid_cnt.keys()) 159 | for pid in rest: 160 | print(pid, 0) 161 | 162 | if __name__ == "__main__": 163 | annotation_id_blacklist = get_package_annotations() 164 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=False) 165 | print(compute_convo_agreement(convo_id_to_convo)) 166 | print(compute_convo_agreement(convo_id_to_convo, feature='fluencyValue')) 167 | print(compute_convo_agreement(convo_id_to_convo, feature='sensitivenessValue')) 168 | print(compute_convo_agreement(convo_id_to_convo, feature='specificityValue')) 169 | correctness_rate_for_user = annotator_score(convo_id_to_convo) 170 | for user, rate in correctness_rate_for_user.items(): 171 | print(user, rate) 172 | 173 | distribution_rate_for_user = label_distribution_for_annotator(convo_id_to_convo) 174 | for user, rate in distribution_rate_for_user.items(): 175 | print(user, rate) 176 | 177 | unfinished_packets() -------------------------------------------------------------------------------- /templates/src/segment_analysis/fooling_analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates the plots of how often a bot fools a human. 3 | """ 4 | 5 | import seaborn as sns 6 | from templates.src.segment_analysis.win_function import get_all_annotated_convos 7 | from collections import defaultdict, Counter 8 | from pandas import DataFrame 9 | import numpy as np 10 | from templates.src.mongo_client import SAMPLED_COLLECTION_NAME 11 | from pathlib import Path 12 | 13 | name_mapping_domain = { 14 | 'personachat': { 15 | 'human': 'Human', 16 | 'model': 'BL', 17 | 'lost_in_conversation': 'LC', 18 | 'bert_rank': 'BR', 19 | 'kvmemnn': 'KV', 20 | 'huggingface': 'HF', 21 | 'suckybot': 'DR' 22 | }, 23 | 'dailydialog': { 24 | 'human': 'Human', 25 | 'seq2seq_att': 'S2', 26 | 'bert_rank': 'BR', 27 | 'huggingface': 'GPT', 28 | 'suckybot': 'DR' 29 | }, 30 | 'empathetic_dialogues': { 31 | 'human': 'Human', 32 | 'model': 'BL', 33 | 'seq2seq_att': 'S2', 34 | 'bert_rank': 'BR', 35 | 'huggingface': 'GPT', 36 | 'suckybot': 'DR' 37 | }, 38 | 'sota':{ 39 | 'Human': 'Human', 40 | 'Generative2.7b_bst_0331': 'BL', 41 | 'Meena': 'ME', 42 | 'Cleverbot': 'CL', 43 | 'Mitsuku': 'MI', 44 | } 45 | } 46 | 47 | 48 | def decision_distribution(convo_id_to_convo): 49 | distribution = defaultdict(lambda: []) 50 | for cid, convo in convo_id_to_convo.items(): 51 | domain = convo['domain_name'] 52 | system_name0 = convo['system_type0'] 53 | system_name1 = convo['system_type1'] 54 | 55 | 56 | 57 | system_name0 = name_mapping_domain[domain][system_name0] 58 | system_name1 = name_mapping_domain[domain][system_name1] 59 | 60 | for tid, annotations in convo['annotations'].items(): 61 | for annotation in annotations: 62 | human0_pred = annotation['entity0_annotation']['is_human'] 63 | human1_pred = annotation['entity1_annotation']['is_human'] 64 | 65 | distribution['system_name'].append(system_name0) 66 | distribution['system_name'].append(system_name1) 67 | distribution['segment'].append(tid) 68 | distribution['segment'].append(tid) 69 | 70 | if human0_pred is True: 71 | distribution['is_human'].append('Human') 72 | distribution['is_human_val'].append(1) 73 | elif human0_pred is False: 74 | distribution['is_human'].append('Bot') 75 | distribution['is_human_val'].append(1) 76 | else: 77 | distribution['is_human'].append('Undecided') 78 | distribution['is_human_val'].append(1) 79 | 80 | if human1_pred is True: 81 | distribution['is_human'].append('Human') 82 | distribution['is_human_val'].append(1) 83 | elif human1_pred is False: 84 | distribution['is_human'].append('Bot') 85 | distribution['is_human_val'].append(0) 86 | else: 87 | distribution['is_human'].append('Undecided') 88 | distribution['is_human_val'].append(1) 89 | 90 | df = DataFrame.from_dict(distribution) 91 | return df 92 | 93 | 94 | def compute_class_ratio(convo_id_to_convo): 95 | distribution = defaultdict(lambda: defaultdict(lambda: Counter())) 96 | for cid, convo in convo_id_to_convo.items(): 97 | domain = convo['domain_name'] 98 | system_name0 = convo['system_type0'] 99 | system_name1 = convo['system_type1'] 100 | 101 | if system_name0 == 'Human': 102 | system_name0 = 'Human_{}'.format(system_name1) 103 | if system_name1 == 'Human': 104 | system_name1 = 'Human_{}'.format(system_name0) 105 | 106 | system_name0 = name_mapping_domain[domain].get(system_name0, system_name0) 107 | system_name1 = name_mapping_domain[domain].get(system_name1, system_name1) 108 | 109 | for tid, annotations in convo['annotations'].items(): 110 | for annotation in annotations: 111 | human0_pred = annotation['entity0_annotation']['is_human'] 112 | human1_pred = annotation['entity1_annotation']['is_human'] 113 | 114 | if human0_pred is True: 115 | distribution[system_name0][tid].update(['Human']) 116 | elif human0_pred is False: 117 | distribution[system_name0][tid].update(['Bot']) 118 | else: 119 | distribution[system_name0][tid].update(['Undecided']) 120 | 121 | if human1_pred is True: 122 | distribution[system_name1][tid].update(['Human']) 123 | elif human1_pred is False: 124 | distribution[system_name1][tid].update(['Bot']) 125 | else: 126 | distribution[system_name1][tid].update(['Undecided']) 127 | 128 | flat_distr = defaultdict(lambda: []) 129 | for system_name, tid_to_distr in distribution.items(): 130 | for tid, distr in tid_to_distr.items(): 131 | total = sum(distr.values()) 132 | for cls, cnt in distr.items(): 133 | distr[cls] = cnt / total 134 | 135 | flat_distr['system_name'].append(system_name) 136 | flat_distr['segment'].append(tid) 137 | flat_distr['type'].append(cls) 138 | flat_distr['val'].append(cnt / total) 139 | 140 | flat_df = DataFrame.from_dict(flat_distr) 141 | return flat_df 142 | 143 | 144 | def spotting_rate(convo_id_to_convo): 145 | n_spotted = defaultdict(lambda: defaultdict(lambda: [0, 0])) 146 | for cid, convo in convo_id_to_convo.items(): 147 | domain = convo['domain_name'] 148 | system_name0 = convo['system_type0'] 149 | system_name1 = convo['system_type1'] 150 | 151 | system_name0 = name_mapping_domain[domain][system_name0] 152 | system_name1 = name_mapping_domain[domain][system_name1] 153 | tid_to_annotations = convo['annotations'] 154 | spotted0 = False 155 | spotted1 = False 156 | for tid, anntoations in sorted(tid_to_annotations.items()): 157 | for annotation in anntoations: 158 | human0_pred = annotation['entity0_annotation']['is_human'] 159 | human1_pred = annotation['entity1_annotation']['is_human'] 160 | if not human0_pred: 161 | spotted0 = True 162 | if not human1_pred: 163 | spotted1 = True 164 | if spotted0: 165 | n_spotted[system_name0][tid][0] += 1 166 | if spotted1: 167 | n_spotted[system_name1][tid][0] += 1 168 | 169 | n_spotted[system_name0][tid][1] += 1 170 | n_spotted[system_name1][tid][1] += 1 171 | 172 | for system_name, tid_to_rate in n_spotted.items(): 173 | for tid, rate in tid_to_rate.items(): 174 | print(system_name, tid, rate[0] / rate[1]) 175 | 176 | 177 | def conditioned_fooling_rate(): 178 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=True) 179 | 180 | conditioned_fooling_for_pair = defaultdict(lambda: [0, 0]) 181 | for cid, convo in convo_id_to_convo.items(): 182 | domain = convo['domain_name'] 183 | system_name0 = convo['system_type0'] 184 | system_name1 = convo['system_type1'] 185 | 186 | system_name0 = name_mapping_domain[domain][system_name0] 187 | system_name1 = name_mapping_domain[domain][system_name1] 188 | tid_to_annotations = convo['annotations'] 189 | for tid, annotations in tid_to_annotations.items(): 190 | for annotation in annotations: 191 | human0_pred = annotation['entity0_annotation']['is_human'] 192 | human1_pred = annotation['entity1_annotation']['is_human'] 193 | 194 | if human0_pred: 195 | conditioned_fooling_for_pair[system_name0, system_name1][0] += 1 196 | if human1_pred: 197 | conditioned_fooling_for_pair[system_name1, system_name0][0] += 1 198 | conditioned_fooling_for_pair[system_name0, system_name1][1] += 1 199 | conditioned_fooling_for_pair[system_name1, system_name0][1] += 1 200 | return conditioned_fooling_for_pair 201 | 202 | 203 | if __name__ == "__main__": 204 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=False) 205 | # spotting_rate(convo_id_to_convo) 206 | 207 | conditioned_fooling_for_pair = conditioned_fooling_rate() 208 | for pair, rate in conditioned_fooling_for_pair.items(): 209 | print('{}\t{}\t{}\t{}\t{}'.format(pair[0], pair[1], rate[0], rate[1], rate[0] / rate[1])) 210 | 211 | Path("figures/{}".format(SAMPLED_COLLECTION_NAME)).mkdir(parents=True, exist_ok=True) 212 | 213 | dataframe = decision_distribution(convo_id_to_convo) 214 | sns_plt = sns.catplot(x='segment', hue='is_human', col='system_name', kind='count', col_wrap=3, data=dataframe) 215 | sns_plt.savefig('figures/{}/turn_distribtion'.format(SAMPLED_COLLECTION_NAME)) 216 | 217 | dataframe = compute_class_ratio(convo_id_to_convo) 218 | print(dataframe.head()) 219 | sns_plt = sns.catplot(x='segment', y='val', hue='type', col='system_name', kind='bar', col_wrap=3, data=dataframe) 220 | sns_plt.savefig('figures/{}/turn_distribtion_rates'.format(SAMPLED_COLLECTION_NAME)) 221 | 222 | sns_plt = sns.catplot(x='system_name', y='val', hue='type', kind='bar', data=dataframe, legend_out=False, 223 | palette=sns.color_palette(n_colors=3)) 224 | sns_plt.set_axis_labels("", "Fooling Rates").set(ylim=(0, 1.0)).despine(left=True) 225 | sns_plt.savefig('figures/{}/fooling_rates'.format(SAMPLED_COLLECTION_NAME)) 226 | 227 | sns_plt = sns.catplot(x='system_name', y='val', hue='segment', kind='bar', 228 | data=dataframe[dataframe.type == 'Human'], legend_out=False, 229 | palette=sns.color_palette(n_colors=3)) 230 | sns_plt.set_axis_labels("", "Fooling Rates").set(ylim=(0, 1.0)).despine(left=True) 231 | sns_plt.savefig('figures/{}/fooling_rates_over_time'.format(SAMPLED_COLLECTION_NAME)) 232 | 233 | human_pred_rates = dataframe[dataframe.type == 'Human'] 234 | human_pred_rates = human_pred_rates.sort_values(['val'], ascending=False).reset_index(drop=True) 235 | sns_plt = sns.catplot(x='system_name', y='val', kind='bar', data=human_pred_rates, legend_out=False, color='royalblue') 236 | sns_plt.set_axis_labels("", "Predicted as Human").set(ylim=(0, 1.0)).despine(left=True) 237 | sns_plt.savefig('figures/{}/human_pred_rates'.format(SAMPLED_COLLECTION_NAME)) 238 | 239 | segments = np.sort(dataframe.segment.unique()) 240 | for system_name in dataframe.system_name.unique(): 241 | for segment_len in segments: 242 | human = dataframe[(dataframe.system_name == system_name) & (dataframe.segment == segment_len) & ( 243 | dataframe.type == 'Human')] 244 | human_rate = float(human['val']) if human.size > 0 else 0.0 245 | 246 | bot = dataframe[ 247 | (dataframe.system_name == system_name) & (dataframe.segment == segment_len) & (dataframe.type == 'Bot')] 248 | bot_rate = float(bot['val']) if bot.size > 0 else 0.0 249 | 250 | undecided = dataframe[(dataframe.system_name == system_name) & (dataframe.segment == segment_len) & ( 251 | dataframe.type == 'Undecided')] 252 | undecided_rate = float(undecided['val']) if undecided.size > 0 else 0.0 253 | print('{}\t{}\t{}\t{}\t{}'.format(system_name, segment_len, human_rate, bot_rate, undecided_rate)) 254 | print('\n\n') 255 | -------------------------------------------------------------------------------- /templates/src/segment_analysis/ranking_significance.py: -------------------------------------------------------------------------------- 1 | from templates.src.segment_analysis.win_significance import sample_convos_for_bot_pair 2 | from templates.src.mongo_client import SAMPLED_COLLECTION_NAME 3 | from templates.src.segment_analysis.segmented_bootstrap_sampling import bootstrap_sampling, get_all_annotated_convos, create_set_of_matches 4 | from itertools import combinations 5 | from collections import defaultdict, Counter 6 | from tqdm import tqdm 7 | import multiprocessing 8 | import argparse 9 | import os 10 | import itertools 11 | 12 | 13 | from datetime import datetime 14 | 15 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=True) 16 | 17 | def get_list_of_systems(): 18 | systems = set() 19 | for convo in convo_id_to_convo.values(): 20 | s0 = convo['system_type0'] 21 | s1 = convo['system_type1'] 22 | 23 | systems.update([s0, s1]) 24 | return systems 25 | 26 | def range_overlap(range0, range1): 27 | if range0[1] < range1[0] or range1[1] < range0[0]: 28 | return False 29 | else: 30 | return True 31 | 32 | def get_range_cluster(overlap_for_pair, system, assigned_systems): 33 | cluster = set() 34 | overlapping_systems = get_overlapping_systems_for_system(overlap_for_pair, system) 35 | #if there is no change in the assigned systems return 36 | if overlapping_systems.union(assigned_systems) == assigned_systems: 37 | return overlapping_systems 38 | assigned_systems = overlapping_systems.union(assigned_systems) 39 | cluster = overlapping_systems.union(cluster) 40 | for osystem in overlapping_systems: 41 | overlapping_systems = get_range_cluster(overlap_for_pair, osystem, assigned_systems) 42 | assigned_systems = assigned_systems.union(overlapping_systems) 43 | cluster = overlapping_systems.union(cluster) 44 | return cluster 45 | 46 | 47 | def get_overlapping_systems_for_system(overlap_for_pair, system): 48 | overlapping_systems = set() 49 | for other_system, overlap in overlap_for_pair[system].items(): 50 | if overlap: 51 | overlapping_systems.add(other_system) 52 | return overlapping_systems 53 | 54 | 55 | def create_clusters(rank_range_for_system): 56 | overlap_for_pair = defaultdict(lambda : defaultdict(lambda : False)) 57 | systems = list(rank_range_for_system.keys()) 58 | lower_bound_to_bots = defaultdict(lambda : set()) 59 | for system0, system1 in combinations(systems, r=2): 60 | range0 = rank_range_for_system[system0] 61 | range1 = rank_range_for_system[system1] 62 | overlap = range_overlap(range0, range1) 63 | overlap_for_pair[system0][system1] = overlap 64 | overlap_for_pair[system1][system0] = overlap 65 | lower_bound_to_bots[range0[0]].add(system0) 66 | lower_bound_to_bots[range1[0]].add(system1) 67 | 68 | assigned_systems = set() 69 | clusters = [] 70 | for i in range(len(systems)): 71 | range_systems = lower_bound_to_bots.get(i, None) 72 | if range_systems is None: 73 | continue 74 | overlapping_systems = set() 75 | for system in range_systems: 76 | cluster = get_range_cluster(overlap_for_pair, system, {s for s in assigned_systems}) 77 | cluster.add(system) 78 | if assigned_systems.union(cluster) == assigned_systems: 79 | continue 80 | assigned_systems = assigned_systems.union(cluster) 81 | overlapping_systems = overlapping_systems.union(cluster) 82 | 83 | if len(overlapping_systems) > 0: 84 | clusters.append(overlapping_systems) 85 | return clusters 86 | 87 | 88 | def reduced_dict(convo_id_to_convo, system_names): 89 | reduced_cid_to_convo = {} 90 | for cid, convo in convo_id_to_convo.items(): 91 | if convo['system_type0'] in system_names or convo['system_type1'] in system_names: 92 | continue 93 | 94 | reduced_cid_to_convo[cid] = convo 95 | return reduced_cid_to_convo 96 | 97 | def leave_out_significance(args): 98 | repetitions, rep, n_samples, trueskill, naive, leave_n_out, feature, ps = args 99 | system_names = get_list_of_systems() 100 | 101 | #all combinations of system-sets to leave out 102 | combos = list(itertools.combinations(system_names, leave_n_out)) 103 | date_time = datetime.now().strftime("%Y%m%d-%H%M%S") 104 | 105 | with open(os.path.join('data', 'rank_significance', 'output-{}.txt'.format(date_time)), 'wt', encoding='utf-8') as ofile: 106 | ofile.write('Sig Repetitions\t{}\n'.format(sig_repetitions)) 107 | ofile.write('Sample Repetitions\t{}\n'.format(sample_repetitions)) 108 | ofile.write('Number of Samples\t{}\n'.format(n_samples)) 109 | ofile.write('Use TrueSkill\t{}\n'.format(trueskill)) 110 | ofile.write('Use Naive\t{}\n'.format(naive)) 111 | ofile.write('Levae Out\t{}\n'.format(leave_n_out)) 112 | ofile.write('Pool Size\t{}\n'.format(ps)) 113 | ofile.write('Feature\t{}\n'.format(feature)) 114 | ofile.write('Domain\t{}\n'.format(SAMPLED_COLLECTION_NAME)) 115 | 116 | for system_subset in combos: 117 | ofile.write(', '.join(list(system_subset)) + '\n') 118 | ofile.flush() 119 | rep_args = [(i, repetitions, rep, n_samples, trueskill, naive, system_subset, feature) for i in range(3, 45, 1)] 120 | pool = multiprocessing.Pool(ps) 121 | outputs = pool.map(repeat_ranking, rep_args) 122 | pool.close() 123 | for op in outputs: 124 | ofile.write('{}\t{}\n'.format(op[0], op[1])) 125 | ofile.flush() 126 | 127 | 128 | def repeat_ranking(args): 129 | sij, repetitions, rep, n_samples, trueskill, naive, leave_out, feature = args 130 | print('Start:', sij) 131 | repetitions = repetitions 132 | cluster_count = Counter() 133 | for _ in tqdm(range(repetitions)): 134 | if leave_out is not None: 135 | reduced_convo_id_to_convo = reduced_dict(convo_id_to_convo, set(leave_out)) 136 | sampled_convo_id_to_convo = sample_convos_for_bot_pair(reduced_convo_id_to_convo, sij) 137 | else: 138 | sampled_convo_id_to_convo = sample_convos_for_bot_pair(convo_id_to_convo, sij) 139 | matches = create_set_of_matches(sampled_convo_id_to_convo, feature=feature) 140 | #matches = create_naive_matches(sampled_convo_id_to_convo, feature=None) 141 | rank_range_for_system, _, _ = bootstrap_sampling(matches, sampled_convo_id_to_convo, compute_scores=False, naive=naive, feature=feature, trueskill=trueskill, rep=rep, n_samples=n_samples) 142 | cluster = create_clusters(rank_range_for_system) 143 | hashable_cluster = tuple([tuple(sorted(cl)) for cl in cluster]) 144 | cluster_count.update([hashable_cluster]) 145 | most_common, n = sorted(cluster_count.items(), key=lambda x: x[1], reverse=True)[0] 146 | print('{}:\t{}\t{}'.format(sij, most_common, n/repetitions)) 147 | print('End:',sij) 148 | return sij, n / repetitions 149 | 150 | if __name__ == "__main__": 151 | parser = argparse.ArgumentParser(description='Compute Ranking Significance with respect to Number of Pairwise Conversations.') 152 | parser.add_argument('-r', '--repetitions', dest='repetitions', type=int, default=1000) 153 | parser.add_argument('-sr', '--sample-repetitions', dest='sample_repetitions', type=int, default=1000) 154 | parser.add_argument('-ns', '--number-samples', dest='n_samples', type=int, default=5000) 155 | parser.add_argument('-ts', '--true-skill', dest='trueskill', type=bool, default=False) 156 | parser.add_argument('-nv', '--naive', dest='naive', type=bool, default=False) 157 | parser.add_argument('-lo', '--leave-one-out', dest='loo', type=int, default=0) 158 | parser.add_argument('-f', '--feature', dest='feature', type=str, default=None) 159 | parser.add_argument('-ps', '--pool-size', dest='pool_size', type=int, default=2) 160 | args = parser.parse_args() 161 | 162 | sig_repetitions = args.repetitions 163 | sample_repetitions = args.sample_repetitions 164 | n_samples = args.n_samples 165 | trueskill = args.trueskill 166 | naive = args.naive 167 | loo = args.loo 168 | ps = args.pool_size 169 | feature = args.feature 170 | 171 | if loo == 0: 172 | #repeat_ranking(30) 173 | rep_args = [(i, sig_repetitions, sample_repetitions, n_samples, trueskill, naive, None, feature) for i in 174 | range(3, 45, 1)] 175 | pool = multiprocessing.Pool(ps) 176 | outputs = pool.map(repeat_ranking, rep_args) 177 | date_time = datetime.now().strftime("%Y%m%d-%H%M%S") 178 | with open(os.path.join('data', 'rank_significance', 'output-{}.txt'.format(date_time)), 'wt', encoding='utf-8') as ofile: 179 | ofile.write('Sig Repetitions\t{}\n'.format(sig_repetitions)) 180 | ofile.write('Sample Repetitions\t{}\n'.format(sample_repetitions)) 181 | ofile.write('Number of Samples\t{}\n'.format(n_samples)) 182 | ofile.write('Use TrueSkill\t{}\n'.format(trueskill)) 183 | ofile.write('Use Naive\t{}\n'.format(naive)) 184 | ofile.write('Pool Size\t{}\n'.format(ps)) 185 | ofile.write('Feature\t{}\n'.format(feature)) 186 | ofile.write('Domain\t{}\n'.format(SAMPLED_COLLECTION_NAME)) 187 | for op in outputs: 188 | ofile.write('{}\t{}\n'.format(op[0], op[1])) 189 | else: 190 | leave_out_significance((sig_repetitions, sample_repetitions, n_samples, trueskill, naive, loo, feature, ps)) -------------------------------------------------------------------------------- /templates/src/segment_analysis/segmented_bootstrap_sampling.py: -------------------------------------------------------------------------------- 1 | from templates.src.segment_analysis.win_function import compute_winner_for_convo, compute_pairwise_wins, \ 2 | compute_naive_winner_for_feature_annotation, compute_naive_winner_for_ssa_annotation, \ 3 | compute_naive_winner_for_annotation, compute_naive_win_rate 4 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 5 | from collections import defaultdict, Counter 6 | import random 7 | from trueskill import rate_1vs1, TrueSkill 8 | import numpy as np 9 | import trueskill 10 | 11 | 12 | def compute_rank_range(rank_count_for_system, p=950): 13 | rank_range_for_system = {} 14 | for system, rank_count in rank_count_for_system.items(): 15 | min_rank = min(list(rank_count.keys())) 16 | max_rank = max(list(rank_count.keys())) 17 | range_to_count = {} 18 | for lower_bound in range(min_rank, max_rank + 1, 1): 19 | for upper_bound in range(lower_bound, max_rank + 1): 20 | range_to_count[lower_bound, upper_bound] = 0 21 | for i in range(lower_bound, upper_bound + 1): 22 | range_to_count[lower_bound, upper_bound] += rank_count[i] 23 | filtered_ranges = [rng for rng, cnt in range_to_count.items() if cnt > p] 24 | 25 | len_sorting = sorted(filtered_ranges, key=lambda x: x[1] - x[0]) 26 | rank_sorted = sorted(len_sorting, key=lambda x: x[0]) 27 | # best range? smallest range? 28 | best_rank = rank_sorted[0] 29 | rank_range_for_system[system] = best_rank 30 | return rank_range_for_system 31 | 32 | 33 | def compute_win_rate_for_system(sampled_matches): 34 | ranking_for_sample = defaultdict(lambda: [0, 0]) 35 | for match in sampled_matches: 36 | system_name0, system_name1, win0, win1 = match 37 | ranking_for_sample[system_name0][0] += win0 38 | ranking_for_sample[system_name1][0] += win1 39 | ranking_for_sample[system_name0][1] += 1 40 | ranking_for_sample[system_name1][1] += 1 41 | 42 | win_rates = {system_type: x[0] / x[1] for system_type, x in ranking_for_sample.items()} 43 | ranking = sorted(win_rates.items(), key=lambda x: x[1], reverse=True) 44 | return ranking 45 | 46 | 47 | def compute_trueskill_ranking(matches): 48 | env = TrueSkill(0, 0.5) 49 | env.beta = 0.025 * (0.5 ** 2) * len(matches) 50 | env.tau = 0 51 | player_objects = defaultdict(lambda: env.create_rating()) 52 | for match in matches: 53 | system_name0, system_name1, win0, win1 = match 54 | player0 = player_objects[system_name0] 55 | player1 = player_objects[system_name1] 56 | 57 | if win0 == 1 and win1 == 0: 58 | new_player0, new_player1 = rate_1vs1(player0, player1) 59 | elif win1 == 1 and win0 == 0: 60 | new_player1, new_player0 = rate_1vs1(player1, player0) 61 | else: 62 | new_player0, new_player1 = rate_1vs1(player0, player1, drawn=True) 63 | 64 | player_objects[system_name0] = new_player0 65 | player_objects[system_name1] = new_player1 66 | ranking = sorted(player_objects.items(), key=lambda x: x[1].mu, reverse=True) 67 | return ranking 68 | 69 | 70 | def bootstrap_sampling(matches, convo_id_to_convo, compute_scores=True, naive=False, feature=None, trueskill=False, rep=1000, n_samples=5000): 71 | rank_count_for_system = defaultdict(lambda: Counter()) 72 | trueskill_scores_for_system = defaultdict(lambda: 0) 73 | scores_for_system = defaultdict(lambda: 0) 74 | for i in range(rep): 75 | if not trueskill: 76 | #rand_ids = np.random.choice(len(matches), size=n_samples, replace=True) 77 | rand_ids = np.random.randint(0, len(matches), size=n_samples) 78 | sample = [matches[rid] for rid in rand_ids] 79 | ranking = compute_win_rate_for_system(sample) 80 | else: 81 | random.shuffle(matches) 82 | #sampled_matches = random.sample(matches, k=n_samples) 83 | ranking = compute_trueskill_ranking(matches) 84 | 85 | for rank, (system, rate) in enumerate(ranking): 86 | rank_count_for_system[system].update([rank]) 87 | if trueskill: 88 | trueskill_scores_for_system[system] += rate.mu 89 | 90 | if trueskill: 91 | for system, sum_mu in trueskill_scores_for_system.items(): 92 | trueskill_scores_for_system[system] = sum_mu / rep 93 | 94 | if not naive: 95 | pairs_to_win_score = compute_pairwise_wins(convo_id_to_convo, feature=feature) 96 | else: 97 | pairs_to_win_score = compute_naive_win_rate(convo_id_to_convo, feature=feature) 98 | 99 | if compute_scores: 100 | system_names = sorted(list(set([x[0] for x in pairs_to_win_score.keys()]))) 101 | for system_name in system_names: 102 | for system_name_other in set(system_names).difference([system_name]): 103 | wins = pairs_to_win_score[system_name, system_name_other][0] 104 | losses = pairs_to_win_score[system_name_other, system_name][0] 105 | if not wins + losses == 0: 106 | scores_for_system[system_name] += (1 / (len(system_names) - 1)) * (wins / (wins + losses)) 107 | 108 | rank_range_for_system = compute_rank_range(rank_count_for_system, p=int(rep * 0.995)) 109 | return rank_range_for_system, scores_for_system, trueskill_scores_for_system 110 | 111 | 112 | def create_set_of_matches(convo_id_to_convo, feature=None, ignore_human=False): 113 | matches = [] 114 | for cid, convo in convo_id_to_convo.items(): 115 | system_name0 = convo['system_type0'] 116 | system_name1 = convo['system_type1'] 117 | 118 | if ignore_human: 119 | if system_name0 == 'human' or system_name1 == 'human': 120 | continue 121 | 122 | win_score = compute_winner_for_convo(convo, feature) 123 | if win_score == 1: 124 | matches.append((system_name0, system_name1, 1, 0)) 125 | elif win_score == -1: 126 | matches.append((system_name0, system_name1, 0, 1)) 127 | else: 128 | matches.append((system_name0, system_name1, 0, 0)) 129 | return matches 130 | 131 | 132 | def create_naive_matches(convo_id_to_convo, feature=None): 133 | matches = [] 134 | for cid, convo in convo_id_to_convo.items(): 135 | system_name0 = convo['system_type0'] 136 | system_name1 = convo['system_type1'] 137 | for tid, annotations in convo['annotations'].items(): 138 | for annotation in annotations: 139 | if feature is None: 140 | win_score = compute_naive_winner_for_annotation(annotation) 141 | elif feature == 'ssa': 142 | win_score = compute_naive_winner_for_ssa_annotation(annotation) 143 | else: 144 | win_score = compute_naive_winner_for_feature_annotation(annotation, feature) 145 | 146 | if win_score == 1: 147 | matches.append((system_name0, system_name1, 1, 0)) 148 | elif win_score == -1: 149 | matches.append((system_name0, system_name1, 0, 1)) 150 | else: 151 | matches.append((system_name0, system_name1, 0, 0)) 152 | return matches 153 | 154 | 155 | def print_result(convo_id_to_convo, naive=True, feature=None, trueskill=False): 156 | if naive: 157 | matches = create_naive_matches(convo_id_to_convo, feature=feature) 158 | else: 159 | matches = create_set_of_matches(convo_id_to_convo, feature=feature) 160 | print('Draws: ', len([x for x in matches if x[2] == x[3] == 0]) / len(matches)) 161 | rank_range_for_system, scores_for_system, trueskill_scores_for_system = bootstrap_sampling(matches ,convo_id_to_convo, naive=naive, feature=feature, trueskill=trueskill) 162 | for system, rrange in rank_range_for_system.items(): 163 | print('{}\t{}\t{}\t{}\t{}'.format(system, trueskill_scores_for_system[system], scores_for_system[system], 164 | rrange[0] + 1, rrange[1] + 1)) 165 | print('\n\n') 166 | 167 | 168 | if __name__ == "__main__": 169 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=True, apply_blacklist=False) 170 | trueskill = False 171 | naive = True 172 | print('SPOT THE BOT') 173 | print_result(convo_id_to_convo, naive=naive, feature=None, trueskill=trueskill) 174 | print('SSA') 175 | print_result(convo_id_to_convo, naive=naive, feature='ssa', trueskill=trueskill) 176 | print('Fluency') 177 | print_result(convo_id_to_convo, naive=naive, feature='fluencyValue', trueskill=trueskill) 178 | print('Sensibleness') 179 | print_result(convo_id_to_convo, naive=naive, feature='sensitivenessValue', trueskill=trueskill) 180 | print('Specificity') 181 | print_result(convo_id_to_convo, feature='specificityValue', trueskill=trueskill) 182 | -------------------------------------------------------------------------------- /templates/src/segment_analysis/win_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shows the pairwise winning rates. 3 | """ 4 | from collections import defaultdict 5 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 6 | 7 | user_black_list = [''] 8 | 9 | 10 | def compute_naive_win_rate(convo_id_to_convo, feature=None): 11 | pairs_to_win_score = defaultdict(lambda: [0, 0]) 12 | for cid, convo in convo_id_to_convo.items(): 13 | system_name0 = convo['system_type0'] 14 | system_name1 = convo['system_type1'] 15 | for tid, annotations in convo['annotations'].items(): 16 | for annotation in annotations: 17 | if feature is None: 18 | win_score = compute_naive_winner_for_annotation(annotation) 19 | elif feature == 'ssa': 20 | win_score = compute_naive_winner_for_ssa_annotation(annotation) 21 | else: 22 | win_score = compute_naive_winner_for_feature_annotation(annotation, feature) 23 | 24 | if win_score == 1: 25 | pairs_to_win_score[system_name0, system_name1][0] += 1 26 | elif win_score == -1: 27 | pairs_to_win_score[system_name1, system_name0][0] += 1 28 | pairs_to_win_score[system_name1, system_name0][1] += 1 29 | pairs_to_win_score[system_name0, system_name1][1] += 1 30 | return pairs_to_win_score 31 | 32 | 33 | def compute_naive_winner_for_annotation(annotation): 34 | score0 = 0 35 | if annotation['entity0_annotation']['is_human'] is True: 36 | score0 = 2 37 | elif annotation['entity0_annotation']['is_human'] is None: 38 | score0 = 1 39 | 40 | score1 = 0 41 | if annotation['entity1_annotation']['is_human'] is True: 42 | score1 = 2 43 | elif annotation['entity1_annotation']['is_human'] is None: 44 | score1 = 1 45 | 46 | if score0 > score1: 47 | return 1 48 | elif score1 > score0: 49 | return -1 50 | else: 51 | return 0 52 | 53 | 54 | def compute_naive_winner_for_feature_annotation(annotation, feature): 55 | human0_pred = annotation['entity0_annotation'][feature] 56 | human1_pred = annotation['entity1_annotation'][feature] 57 | 58 | if human0_pred and not human1_pred: 59 | return 1 60 | elif not human0_pred and human1_pred: 61 | return -1 62 | else: 63 | return 0 64 | 65 | def compute_naive_winner_for_ssa_annotation(annotation): 66 | sensiblenessValue0 = annotation['entity0_annotation']['sensitivenessValue'] 67 | sensiblenessValue1 = annotation['entity1_annotation']['sensitivenessValue'] 68 | 69 | specificityValue0 = annotation['entity0_annotation']['specificityValue'] 70 | specificityValue1 = annotation['entity1_annotation']['specificityValue'] 71 | 72 | if sensiblenessValue0 and specificityValue0 and not sensiblenessValue1 and not specificityValue1: 73 | return 1 74 | elif sensiblenessValue1 and specificityValue1 and not specificityValue0 and not specificityValue0: 75 | return -1 76 | else: 77 | return 0 78 | 79 | def compute_winner_for_annotation(annotations): 80 | ent0_score = 0 81 | ent1_score = 0 82 | 83 | ent0_wins = 0 84 | ent1_wins = 0 85 | 86 | for annotation in annotations: 87 | match_score0, match_score1 = 0, 0 88 | human0_pred = annotation['entity0_annotation']['is_human'] 89 | human1_pred = annotation['entity1_annotation']['is_human'] 90 | 91 | if human0_pred is True: 92 | match_score0 = 2 93 | elif human0_pred is None: 94 | match_score0 = 1 95 | 96 | if human1_pred is True: 97 | match_score1 = 2 98 | elif human1_pred is None: 99 | match_score1 = 1 100 | 101 | ent0_score += match_score0 102 | ent1_score += match_score1 103 | 104 | if match_score0 > match_score1: 105 | ent0_wins += 1 106 | elif match_score1 > match_score0: 107 | ent1_wins += 1 108 | 109 | if ent0_wins > ent1_wins: 110 | return 1 111 | elif ent1_wins > ent0_wins: 112 | return -1 113 | else: 114 | if ent0_score > ent1_score: 115 | return 1 116 | elif ent1_score > ent0_score: 117 | return -1 118 | else: 119 | return 0 120 | 121 | 122 | def compute_winner_for_feature_annotations(annotations, feature): 123 | ent0_score = 0 124 | ent1_score = 0 125 | 126 | ent0_wins = 0 127 | ent1_wins = 0 128 | 129 | for annotation in annotations: 130 | match_score0, match_score1 = 0, 0 131 | human0_pred = annotation['entity0_annotation'][feature] 132 | human1_pred = annotation['entity1_annotation'][feature] 133 | 134 | if human0_pred and not human1_pred: 135 | match_score0 = 2 136 | elif human0_pred and human1_pred: 137 | match_score0 = 1 138 | match_score1 = 1 139 | elif human1_pred and not human0_pred: 140 | match_score1 = 2 141 | 142 | ent0_score += match_score0 143 | ent1_score += match_score1 144 | 145 | if match_score0 > match_score1: 146 | ent0_wins += 1 147 | elif match_score1 > match_score0: 148 | ent1_wins += 1 149 | 150 | if ent0_wins > ent1_wins: 151 | return 1 152 | elif ent1_wins > ent0_wins: 153 | return -1 154 | else: 155 | if ent0_score > ent1_score: 156 | return 1 157 | elif ent1_score > ent0_score: 158 | return -1 159 | else: 160 | return 0 161 | 162 | 163 | def compute_ssa_winner_annotation(annotations): 164 | ent0_score = 0 165 | ent1_score = 0 166 | 167 | ent0_wins = 0 168 | ent1_wins = 0 169 | 170 | for annotation in annotations: 171 | match_score0, match_score1 = 0, 0 172 | sensiblenessValue0 = annotation['entity0_annotation']['sensitivenessValue'] 173 | sensiblenessValue1 = annotation['entity1_annotation']['sensitivenessValue'] 174 | 175 | specificityValue0 = annotation['entity0_annotation']['specificityValue'] 176 | specificityValue1 = annotation['entity1_annotation']['specificityValue'] 177 | 178 | if sensiblenessValue0 and specificityValue0 and not sensiblenessValue1 and not specificityValue1: 179 | match_score0 = 2 180 | elif sensiblenessValue1 and specificityValue1 and not specificityValue0 and not specificityValue0: 181 | match_score1 = 2 182 | else: 183 | match_score0 = 1 184 | match_score1 = 1 185 | 186 | ent0_score += match_score0 187 | ent1_score += match_score1 188 | 189 | if match_score0 > match_score1: 190 | ent0_wins += 1 191 | elif match_score1 > match_score0: 192 | ent1_wins += 1 193 | 194 | if ent0_wins > ent1_wins: 195 | return 1 196 | elif ent1_wins > ent0_wins: 197 | return -1 198 | else: 199 | if ent0_score > ent1_score: 200 | return 1 201 | elif ent1_score > ent0_score: 202 | return -1 203 | else: 204 | return 0 205 | 206 | 207 | def compute_winner_for_convo(convo, feature=None): 208 | ent0_wins = 0 209 | ent1_wins = 0 210 | for tid, annotations in convo['annotations'].items(): 211 | if feature is None: 212 | win_id = compute_winner_for_annotation(annotations) 213 | elif feature == 'ssa': 214 | win_id = compute_ssa_winner_annotation(annotations) 215 | else: 216 | win_id = compute_winner_for_feature_annotations(annotations, feature) 217 | if win_id == 1: 218 | ent0_wins += tid 219 | elif win_id == -1: 220 | ent1_wins += tid 221 | 222 | if ent0_wins > ent1_wins: 223 | return 1 224 | elif ent1_wins > ent0_wins: 225 | return -1 226 | else: 227 | return 0 228 | 229 | 230 | def compute_pairwise_wins(convo_id_to_convo, feature=None): 231 | pairs_to_win_score = defaultdict(lambda: [0, 0]) 232 | for cid, convo in convo_id_to_convo.items(): 233 | system_name0 = convo['system_type0'] 234 | system_name1 = convo['system_type1'] 235 | 236 | if system_name0 == 'human' or system_name1 == 'human': 237 | continue 238 | 239 | win_score = compute_winner_for_convo(convo, feature=feature) 240 | if win_score == 1: 241 | pairs_to_win_score[system_name0, system_name1][0] += 1 242 | 243 | elif win_score == -1: 244 | pairs_to_win_score[system_name1, system_name0][0] += 1 245 | pairs_to_win_score[system_name1, system_name0][1] += 1 246 | pairs_to_win_score[system_name0, system_name1][1] += 1 247 | return pairs_to_win_score 248 | 249 | 250 | def print_result(convo_id_to_convo, naive=False, feature=None): 251 | if not naive: 252 | pairs_to_win_score = compute_pairwise_wins(convo_id_to_convo, feature=feature) 253 | else: 254 | pairs_to_win_score = compute_naive_win_rate(convo_id_to_convo, feature=feature) 255 | system_names = sorted(list(set([x[0] for x in pairs_to_win_score.keys()]))) 256 | print('\t'.join([''] + system_names)) 257 | for system_name0 in system_names: 258 | oline = system_name0 259 | for system_name1 in system_names: 260 | if system_name0 == system_name1: 261 | oline += '\t' 262 | else: 263 | nwins, nann = pairs_to_win_score[system_name0, system_name1] 264 | nlosses, _ = pairs_to_win_score[system_name1, system_name0] 265 | if nlosses + nwins > 0: 266 | oline += '\t{:10.2f}'.format(nwins / (nlosses + nwins)) 267 | else: 268 | oline += '\t{}'.format('-') 269 | print(oline) 270 | print('\n\n') 271 | 272 | if __name__ == "__main__": 273 | # user_black_list = create_black_list() 274 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=True, apply_blacklist=False) 275 | 276 | naive = True 277 | print_result(convo_id_to_convo, naive, None) 278 | print_result(convo_id_to_convo, naive, 'ssa') 279 | print_result(convo_id_to_convo, naive, 'fluencyValue') 280 | print_result(convo_id_to_convo, naive, 'sensitivenessValue') 281 | print_result(convo_id_to_convo, naive, 'specificityValue') 282 | -------------------------------------------------------------------------------- /templates/src/segment_analysis/win_significance.py: -------------------------------------------------------------------------------- 1 | from templates.src.segment_analysis.win_function import compute_pairwise_wins, compute_naive_win_rate 2 | from templates.src.segment_analysis.annotator_scores import get_all_annotated_convos 3 | from scipy.stats import binom_test, chi2_contingency 4 | import itertools 5 | from collections import defaultdict 6 | import random 7 | import numpy as np 8 | 9 | def create_random_annotations(convo_id_to_convo): 10 | random_cid_to_convo = {} 11 | pairs_to_win_score = compute_pairwise_wins(convo_id_to_convo, feature=None) 12 | system_names = sorted(list(set([x[0] for x in pairs_to_win_score.keys()]))) 13 | cid = 0 14 | for i in range(10000): 15 | for system_name in system_names: 16 | for system_name_other in set(system_names).difference([system_name]): 17 | convo = {} 18 | convo['system_type0'] = system_name 19 | convo['system_type1'] = system_name_other 20 | tid_to_annotation = {} 21 | for tid in [2,3, 5]: 22 | annotations = [] 23 | for _ in range(2): 24 | annotation = {} 25 | annotation['entity0_annotation'] = {} 26 | annotation['entity1_annotation'] = {} 27 | annotation['entity0_annotation']['is_human'] = random.choice([True, False, None]) 28 | annotation['entity1_annotation']['is_human'] = random.choice([True, False, None]) 29 | annotations.append(annotation) 30 | tid_to_annotation[tid] = annotations 31 | convo['annotations'] = tid_to_annotation 32 | random_cid_to_convo[cid] = convo 33 | cid += 1 34 | 35 | return random_cid_to_convo 36 | 37 | 38 | def compute_significane_rate(convo_id_to_convo, naive=False, feature=None, verbose=False): 39 | if not naive: 40 | pairs_to_win_score = compute_pairwise_wins(convo_id_to_convo, feature=feature) 41 | else: 42 | pairs_to_win_score = compute_naive_win_rate(convo_id_to_convo, feature=feature) 43 | total_significance = 0 44 | n_pairs = 0 45 | system_names = sorted(list(set([x[0] for x in pairs_to_win_score.keys()]))) 46 | 47 | for system_name, system_name_other in itertools.combinations(system_names, r=2): 48 | wins = pairs_to_win_score[system_name, system_name_other][0] 49 | losses = pairs_to_win_score[system_name_other, system_name][0] 50 | total = pairs_to_win_score[system_name, system_name_other][1] 51 | tie = total - wins - losses 52 | 53 | contingency_talbe = np.array([[wins, losses], [total - wins, total -losses]]) 54 | 55 | #pval_win = binom_test(x=[tie, total-tie], p= 1, alternative='less') 56 | #pval_loss = binom_test(x=[losses, losses + wins], p= 1/ 2, alternative='two-sided') 57 | chi2, p, dof, expected = chi2_contingency(contingency_talbe) 58 | if verbose: 59 | print(system_name, system_name_other, p) 60 | 61 | total_significance += int(p < 0.05) 62 | n_pairs += 1 63 | 64 | return total_significance / n_pairs 65 | 66 | 67 | def get_system_names_from_dict(convo_id_to_convo): 68 | system_names = set() 69 | for cid, convo in convo_id_to_convo.items(): 70 | system_names.add(convo['system_type0']) 71 | system_names.add(convo['system_type1']) 72 | 73 | return list(system_names) 74 | 75 | 76 | def reduced_dict(convo_id_to_convo, system_names): 77 | reduced_cid_to_convo = {} 78 | for cid, convo in convo_id_to_convo.items(): 79 | if convo['system_type0'] in system_names and convo['system_type1'] in system_names: 80 | reduced_cid_to_convo[cid] = convo 81 | return reduced_cid_to_convo 82 | 83 | 84 | def sample_convos_for_bot_pair(convo_id_to_convo, sij): 85 | pair_to_convos = defaultdict(lambda : []) 86 | for cid, convo in convo_id_to_convo.items(): 87 | system_type0 = convo['system_type0'] 88 | system_type1 = convo['system_type1'] 89 | if pair_to_convos.get((system_type0, system_type1)) is None and pair_to_convos.get((system_type1, system_type0)) is None: 90 | pair_to_convos[system_type0, system_type1].append(convo) 91 | elif pair_to_convos.get((system_type0, system_type1)) is not None: 92 | pair_to_convos[system_type0, system_type1].append(convo) 93 | elif pair_to_convos.get((system_type1, system_type0)) is not None: 94 | pair_to_convos[system_type1, system_type0].append(convo) 95 | 96 | #sample sij convos for each pair 97 | sampled_convo_id_to_convo = {} 98 | for system_pair, convos in pair_to_convos.items(): 99 | sampled_convos = random.sample(convos, k=sij) 100 | for convo in sampled_convos: 101 | sampled_convo_id_to_convo[str(convo['_id'])] = convo 102 | 103 | return sampled_convo_id_to_convo 104 | 105 | 106 | 107 | def average_significance_for_reduced(convo_id_to_convo, sij, feature=None, naive=False): 108 | repetitions = 1000 109 | total_rate = 0 110 | for _ in range(repetitions): 111 | sampled_convo_id_to_convo = sample_convos_for_bot_pair(convo_id_to_convo, sij) 112 | total_rate += compute_significane_rate(sampled_convo_id_to_convo, feature=feature, naive=naive) 113 | return total_rate/repetitions 114 | 115 | def average_significance_rate(convo_id_to_convo, b, sij, feature=None, naive=False): 116 | system_names = get_system_names_from_dict(convo_id_to_convo) 117 | 118 | total_rate = 0 119 | combos = list(itertools.combinations(system_names, b)) 120 | for system_subset in combos: 121 | reduced_convo_id_to_convo = reduced_dict(convo_id_to_convo, set(system_subset)) 122 | total_rate += average_significance_for_reduced(reduced_convo_id_to_convo, sij, naive=naive, feature=feature) 123 | 124 | return total_rate/len(combos) 125 | 126 | if __name__ == "__main__": 127 | #user_black_list = create_black_list() 128 | feature='ssa' 129 | convo_id_to_convo = get_all_annotated_convos(ignore_humans=True) 130 | naive = False 131 | print(compute_significane_rate(convo_id_to_convo, feature=feature, naive=naive, verbose=True)) 132 | 133 | pairs_to_win_score = compute_naive_win_rate(convo_id_to_convo, feature=feature,) 134 | system_names = sorted(list(set([x[0] for x in pairs_to_win_score.keys()]))) 135 | print('\t'.join([''] + system_names)) 136 | for system_name0 in system_names: 137 | oline = system_name0 138 | for system_name1 in system_names: 139 | if system_name0 == system_name1: 140 | oline += '\t' 141 | else: 142 | nwins, nann = pairs_to_win_score[system_name0, system_name1] 143 | nlosses, nann = pairs_to_win_score[system_name1, system_name0] 144 | if nann > 0: 145 | oline += '\t{}'.format(nwins / (nlosses +nwins)) 146 | #oline += '\t{}/{}'.format(nwins, (nlosses +nwins)) 147 | #oline += '\t{}/{}'.format(nwins, nann) 148 | else: 149 | oline += '\t{}'.format('-') 150 | print(oline) 151 | print('\n\n') 152 | 153 | for sij in range(10, 45): 154 | rate = average_significance_rate(convo_id_to_convo, b=4, sij=sij, naive=naive, feature=feature) 155 | print('{}\t{}'.format(sij, rate)) -------------------------------------------------------------------------------- /templates/src/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | _config = None 5 | 6 | def load_config(): 7 | global _config 8 | if _config is not None: 9 | return _config 10 | else: 11 | with open('config/annotation_app.json', 'rt', encoding='utf-8') as conf_file: 12 | _config = json.load(conf_file) 13 | return _config -------------------------------------------------------------------------------- /templates/src/views.py: -------------------------------------------------------------------------------- 1 | from flask import render_template, Blueprint, request 2 | hello_blueprint = Blueprint('hello',__name__) 3 | 4 | @hello_blueprint.route('/') 5 | @hello_blueprint.route('/hello') 6 | def index(): 7 | return render_template("index.html", convo_id=0, pkg_id=0, show_leaderboard=False, full_convo=False, segmented=False) 8 | 9 | @hello_blueprint.route('/rd') 10 | def conditional_index(): 11 | convo_id = request.args.get('id', 0) 12 | return render_template("index.html", convo_id=convo_id, pkg_id=0, show_leaderboard=False, full_convo=False, segmented=False) 13 | 14 | @hello_blueprint.route('/rdfull') 15 | def conditional_index_full(): 16 | convo_id = request.args.get('id', 0) 17 | return render_template("index.html", convo_id=convo_id, pkg_id=0, show_leaderboard=False, full_convo=True, segmented=False) 18 | 19 | @hello_blueprint.route('/pkg') 20 | def packaged_index(): 21 | pkg_id = request.args.get('id', 0) 22 | return render_template("index.html", convo_id=0, pkg_id=pkg_id, show_leaderboard=False, full_convo=True, segmented=False) 23 | 24 | 25 | @hello_blueprint.route('/pkgsg') 26 | def packaged_segmented_index(): 27 | pkg_id = request.args.get('id', 0) 28 | return render_template("index.html", convo_id=0, pkg_id=pkg_id, show_leaderboard=False, full_convo=True, segmented=True) 29 | 30 | @hello_blueprint.route('/leaderboard') 31 | def leaderboard(): 32 | return render_template("index.html", convo_id=0, pkg_id=0, show_leaderboard=True, full_convo=False, segmented=False) -------------------------------------------------------------------------------- /templates/static/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["react", "es2015"] 3 | } 4 | -------------------------------------------------------------------------------- /templates/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/static/__init__.py -------------------------------------------------------------------------------- /templates/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Spot the Bot 11 | 18 | 19 | 20 | 21 | 37 |
38 |
39 |
40 |
41 |
42 |
43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /templates/static/js/api_client.jsx: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | 3 | const DIALOGUE_SYSTEM_PATH = '/api/dialouge_systems'; 4 | const DIALOGUE_DOMAINS = '/api/dialouge_domains'; 5 | const RANDOM_DIALOGUE_PATH = '/api/random_dialogue'; 6 | const LIST_OF_DIALOGUES = '/api/list_of_dialogues'; 7 | const DIALOGUE_FOR_ID = '/api/get_dialogue_for_id'; 8 | 9 | const POST_DECISION = '/api/post_decision'; 10 | const LEADERBOARD_PATH = '/api/get_leaderboard'; 11 | const SCORE_FOR_USER_PATH = '/api/get_score_for_user'; 12 | const PACKAGE_PATH = '/api/get_package_for_id'; 13 | const PACKAGE_FOR_USER = '/api/get_number_of_packages_for_user'; 14 | 15 | 16 | export default class ApiClient { 17 | 18 | getDialogueSystems() { 19 | return axios.get(DIALOGUE_SYSTEM_PATH, {}); 20 | } 21 | 22 | getDialogueDomains() { 23 | return axios.get(DIALOGUE_DOMAINS, {}); 24 | } 25 | 26 | getRandomDialogue(_dialogue_domains) { 27 | return axios.get(RANDOM_DIALOGUE_PATH, {params: {dialogue_domains: _dialogue_domains}}); 28 | } 29 | 30 | getListOfDialoguesForSystem(_dialogue_system) { 31 | return axios.get(LIST_OF_DIALOGUES, {params: {dialogue_system: _dialogue_system}}); 32 | } 33 | 34 | getListOfDialoguesForDomain(_domain) { 35 | return axios.get(LIST_OF_DIALOGUES, {params: {domain: _domain}}); 36 | } 37 | 38 | getDialogueForID(_dialogue_id) { 39 | return axios.get(DIALOGUE_FOR_ID, {params: {dialogue_id: _dialogue_id}}); 40 | } 41 | 42 | postDecisionForDialogue(data){ 43 | return axios.post(POST_DECISION, data) 44 | } 45 | 46 | getLeaderboard(){ 47 | return axios.get(LEADERBOARD_PATH, {}) 48 | } 49 | 50 | get_score_for_user(user_name){ 51 | return axios.get(SCORE_FOR_USER_PATH, {params: {user_name: user_name}}) 52 | } 53 | 54 | getPackageForID(_package_id){ 55 | return axios.get(PACKAGE_PATH, {params: {package_id: _package_id}}); 56 | } 57 | getNumberOfPackagesForUser(_user_name){ 58 | return axios.get(PACKAGE_FOR_USER, {params: {user_name: _user_name}}); 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /templates/static/js/components/ContinuousSlider.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { makeStyles } from '@material-ui/core/styles'; 3 | import Grid from '@material-ui/core/Grid'; 4 | import Typography from '@material-ui/core/Typography'; 5 | import Slider from '@material-ui/core/Slider'; 6 | import VolumeDown from '@material-ui/icons/VolumeDown'; 7 | import VolumeUp from '@material-ui/icons/VolumeUp'; 8 | 9 | const useStyles = makeStyles({ 10 | root: { 11 | width: 200, 12 | }, 13 | }); 14 | 15 | export default function ContinuousSlider() { 16 | const classes = useStyles(); 17 | const [value, setValue] = React.useState(30); 18 | 19 | const handleChange = (event, newValue) => { 20 | setValue(newValue); 21 | }; 22 | 23 | return ( 24 |
25 | 26 | Volume 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | ); 42 | } -------------------------------------------------------------------------------- /templates/static/js/components/Dialogue.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import ApiClient from '../api_client'; 3 | import DialogueTurn from "./DialogueTurn"; 4 | import {animateScroll} from "react-scroll"; 5 | 6 | export default class Dialogue extends Component { 7 | constructor(props) { 8 | super(props); 9 | this.state = { 10 | current_dialogue: null, 11 | value: [], 12 | current_dialogue_length: -1 13 | 14 | }; 15 | this.onChildNextClick = this.onChildNextClick.bind(this); 16 | this.onChildDecideClick = this.onChildDecideClick.bind(this); 17 | } 18 | 19 | scrollToBottom() { 20 | animateScroll.scrollToBottom({ 21 | containedId: "content" 22 | }); 23 | } 24 | 25 | componentDidMount() { 26 | if (this.props.dialogue_id !== null && this.props.dialogue_id !== undefined) { 27 | this.loadDialogueForID(); 28 | this.setState({current_turn: 2}); //reset the to the first turns 29 | } 30 | } 31 | 32 | componentDidUpdate(prevProps, prevState, snapshot) { 33 | if (this.props.dialogue_id !== prevProps.dialogue_id || this.props.dialogue_domains !== prevProps.dialogue_domains) { 34 | if (this.props.dialogue_id !== null && this.props.dialogue_id !== undefined) { 35 | this.loadDialogueForID(); 36 | this.setState({current_turn: 2}); //reset the to the first turns 37 | } 38 | } 39 | this.scrollToBottom(); 40 | } 41 | 42 | loadDialogueForID() { 43 | const apiClient = new ApiClient(); 44 | 45 | apiClient.getDialogueForID(this.props.dialogue_id).then(res => { 46 | let dialogue_len = res.data.convo.length; 47 | this.props.dialogue_loaded_cb(res.data.start_time, dialogue_len, res.data.is_human0, res.data.is_human1); 48 | this.setState({current_dialogue: res.data, current_dialogue_length: dialogue_len}); 49 | }); 50 | } 51 | 52 | loadRandomDialogue() { 53 | const apiClient = new ApiClient(); 54 | 55 | apiClient.getRandomDialogue(this.props.dialogue_domains).then(res => { 56 | let dialogue_len = res.data.convo.length; 57 | this.setState({current_dialogue: res.data, current_dialogue_length: dialogue_len}); 58 | }); 59 | } 60 | 61 | onChildNextClick() { 62 | this.props.nextCallback(this.state.current_dialogue_length); 63 | } 64 | 65 | onChildDecideClick(entity_number) { 66 | this.props.decisionCallback(this.state.current_dialogue.start_time, entity_number); 67 | } 68 | 69 | //(key === current_dialogue.length - 1 && !this.props.decided0) || 70 | //(key === current_dialogue.length - 1 && !this.props.decided1) || 71 | render() { 72 | let current_dialogue = this.state.current_dialogue; 73 | 74 | if (this.state.current_dialogue != null) { 75 | let current_dialogue = this.state.current_dialogue.convo; 76 | console.log(current_dialogue.length); 77 | let turns = current_dialogue.map((item, key) => 78 | 89 | ); 90 | 91 | return ( 92 | turns 93 | ) 94 | } else 95 | return ( 96 |

No Dialogue Loaded

97 | ) 98 | 99 | 100 | } 101 | 102 | } -------------------------------------------------------------------------------- /templates/static/js/components/DialogueTurn.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | 3 | export default class DialogueTurn extends Component { 4 | constructor(props) { 5 | super(props); 6 | } 7 | 8 | render() { 9 | 10 | let image_src = ''; 11 | if (this.props.bot_turn === 0) { 12 | image_src = 'public/img/bot_blue.jpg'; 13 | } else { 14 | image_src = 'public/img/bot_red.jpg' 15 | } 16 | 17 | let active_class = 'bot_active_turn'; 18 | if (!this.props.active){ 19 | active_class = 'bot_inactive_turn' 20 | } 21 | 22 | let display_next = 'is_not_last_turn'; 23 | if (this.props.display_next){ 24 | display_next = 'is_last_turn' 25 | } 26 | 27 | let display_decision0 = 'is_not_dialogue_end'; 28 | if (this.props.display_decision0){ 29 | display_decision0 = 'is_dialogue_end' 30 | } 31 | let display_decision1 = 'is_not_dialogue_end'; 32 | if (this.props.display_decision1){ 33 | display_decision1 = 'is_dialogue_end' 34 | } 35 | 36 | return ( 37 |
38 |
39 |
40 | 41 |
42 |
43 |

{"Entity: " + this.props.bot_turn}

44 |

{this.props.text}

45 |
46 |
47 |
48 | 49 | 50 | 51 |
52 |
53 | ) 54 | } 55 | } -------------------------------------------------------------------------------- /templates/static/js/components/DialougeContainer.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import Dialogue from "./Dialogue"; 3 | import SubmittedForm from "./RandomCode"; 4 | import SingleAnnotationForm from "./SingleAnnotationForm"; 5 | import {TextField} from "@material-ui/core"; 6 | import ApiClient from "../api_client"; 7 | 8 | const start_turn = 0; 9 | const next_turns = 1; 10 | 11 | 12 | export default class DialogueContainer extends Component { 13 | constructor(props) { 14 | super(props); 15 | this.state = { 16 | dialogue_id: this.props.convo_id, 17 | initial_turn: this.props.start_turn, 18 | deciding0: false, 19 | deciding1: false, 20 | decided0: false, 21 | decided1: false, 22 | decision_turn0: -1, 23 | decision_turn1: -1, 24 | rating_provided: false, 25 | current_turn: this.props.start_turn, 26 | start_time: -1, 27 | current_dialogue_length: -1, 28 | random_code: -1, 29 | final_score: -1, 30 | turn_penalty: -1, 31 | user_name: localStorage.getItem("user_name") ? localStorage.getItem("user_name") : "" 32 | }; 33 | this.dialogueDivRef = React.createRef(); 34 | this.dialogueIdList = React.createRef(); 35 | 36 | this.handleDecision = this.handleDecision.bind(this); 37 | this.handleNext = this.handleNext.bind(this); 38 | this.handleKeyEvent = this.handleKeyEvent.bind(this); 39 | this.dialogue_loaded_callback = this.dialogue_loaded_callback.bind(this); 40 | this.handleSubmission = this.handleSubmission.bind(this); 41 | this.handleEntityDecision = this.handleEntityDecision.bind(this); 42 | this.handleUserName = this.handleUserName.bind(this); 43 | this.handleUserNameFocus = this.handleUserNameFocus.bind(this); 44 | this.handleFinalRating = this.handleFinalRating.bind(this); 45 | this.getUserScore = this.getUserScore.bind(this); 46 | 47 | } 48 | 49 | componentDidMount() { 50 | document.addEventListener("keyup", this.handleKeyEvent); 51 | } 52 | 53 | handleKeyEvent(event) { 54 | if (event.key === 'ArrowDown' && !(this.state.decided0 && this.state.decided1) && !(this.state.deciding0 || this.state.deciding1)) { 55 | this.handleNext(); 56 | } 57 | } 58 | 59 | dialogue_loaded_callback(start_time, current_dialogue_length, is_human0, is_human1) { 60 | if (this.state.current_turn === -1) { 61 | this.setState({ 62 | current_dialogue_length: current_dialogue_length, 63 | start_time: start_time, 64 | current_turn: current_dialogue_length, 65 | is_human0: is_human0, 66 | is_human1: is_human1 67 | }); 68 | } else { 69 | this.setState({ 70 | current_dialogue_length: current_dialogue_length, 71 | start_time: start_time, 72 | is_human0: is_human0, 73 | is_human1: is_human1 74 | }); 75 | } 76 | } 77 | 78 | handleDecision(start_time, entity_number) { 79 | if (this.state.user_name === null || this.state.user_name === '') { 80 | alert('Please Enter a Valid User Name'); 81 | } else { 82 | if (entity_number === 0) { 83 | this.setState({deciding0: true, start_time: start_time}) 84 | } else { 85 | this.setState({deciding1: true, start_time: start_time}) 86 | } 87 | } 88 | } 89 | 90 | getUserScore() { 91 | 92 | } 93 | 94 | 95 | handleNext() { 96 | if (this.state.user_name === null || this.state.user_name === '') { 97 | alert('Please Enter a Valid User Name'); 98 | } else { 99 | if (this.state.current_turn < this.state.current_dialogue_length) { 100 | let new_turn = this.state.current_turn + next_turns; 101 | this.setState({current_turn: new_turn}) 102 | } 103 | 104 | } 105 | } 106 | 107 | handleFinalRating(is_human0, is_human1) { 108 | this.setState({rating_provided: true, is_human0_pred: is_human0, is_human1_pred: is_human1}) 109 | } 110 | 111 | handleSubmission(random_submission_code) { 112 | const apiClient = new ApiClient(); 113 | 114 | apiClient.get_score_for_user(this.state.user_name).then(res => { 115 | this.setState({ 116 | final_score: res.data.final_score, 117 | turn_penalty: res.data.avg_turn_penalty, 118 | random_code: random_submission_code, 119 | }); 120 | } 121 | ); 122 | } 123 | 124 | handleEntityDecision(entity_number) { 125 | if (entity_number === 0) { 126 | this.setState({decided0: true, deciding0: false, decision_turn0: this.state.current_turn}); 127 | } else { 128 | this.setState({decided1: true, deciding1: false, decision_turn1: this.state.current_turn}); 129 | } 130 | } 131 | 132 | handleUserName(event, newValue) { 133 | console.log("new value", event.target.value); 134 | this.setState({user_name: event.target.value}); 135 | localStorage.setItem('user_name', this.state.user_name); 136 | } 137 | 138 | handleUserNameFocus() { 139 | this.setState({user_name: ""}); 140 | localStorage.setItem('user_name', this.state.user_name); 141 | } 142 | 143 | render() { 144 | let formClass = ''; 145 | if (!(this.state.decided0 && this.state.decided1 && this.state.rating_provided)) { 146 | formClass = 'submission_div' 147 | } 148 | 149 | let penalty_class = ''; 150 | if (this.state.current_turn / this.state.current_dialogue_length < 0.3) { 151 | penalty_class = 'root_green' 152 | } else if (this.state.current_turn / this.state.current_dialogue_length < 0.5) { 153 | penalty_class = 'root_yellow_green' 154 | } else if (this.state.current_turn / this.state.current_dialogue_length < 0.7) { 155 | penalty_class = 'root_yellow' 156 | } else if (this.state.current_turn / this.state.current_dialogue_length < 0.9) { 157 | penalty_class = 'root_red' 158 | } else { 159 | penalty_class = 'root_darkred' 160 | } 161 | 162 | return ( 163 |
164 |
165 |
166 | 174 |
175 |
176 | 183 |
184 |
185 |
186 |
187 | 198 |
199 |
200 | 201 | 202 |
203 |
204 | 219 |
220 |
221 |
222 |
223 | 236 |
237 |
238 |
239 | ) 240 | 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /templates/static/js/components/DialougeDomainFilter.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import ApiClient from '../api_client'; 3 | import DialogueList from "./DialougeList"; 4 | 5 | export default class DialougeSystemFilter extends Component { 6 | constructor(props) { 7 | super(props); 8 | this.state = { 9 | dialogue_domains: [], dialogue_domain_value: null 10 | }; 11 | this.handleChange = this.handleChange.bind(this); 12 | } 13 | 14 | componentDidMount() { 15 | this.loadDialogueDomains(); 16 | } 17 | 18 | loadDialogueDomains() { 19 | const apiClient = new ApiClient(); 20 | 21 | apiClient.getDialogueDomains().then(res => { 22 | this.setState({dialogue_domains: res.data}); 23 | }); 24 | } 25 | 26 | handleChange(event) { 27 | this.setState({dialogue_domain_value: event.target.value}); 28 | } 29 | 30 | render() { 31 | let dialogue_systems = this.state.dialogue_domains; 32 | 33 | let options = dialogue_systems.map((data) => 34 | 40 | ); 41 | 42 | return ( 43 |
44 |
45 | 49 |
50 |
51 | 52 |
53 |
54 | 55 | 56 | ) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /templates/static/js/components/DialougeSystemFilter.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import ApiClient from '../api_client'; 3 | import DialogueList from "./DialougeList"; 4 | 5 | export default class DialougeSystemFilter extends Component { 6 | constructor(props) { 7 | super(props); 8 | this.state = { 9 | dialogue_systems: [], dialogue_system_value: null}; 10 | this.handleChange = this.handleChange.bind(this); 11 | } 12 | 13 | componentDidMount() { 14 | this.loadDialogueSystems(); 15 | } 16 | 17 | loadDialogueSystems() { 18 | const apiClient = new ApiClient(); 19 | 20 | apiClient.getDialogueSystems().then(res => { 21 | this.setState({dialogue_systems: res.data}); 22 | }); 23 | } 24 | 25 | handleChange(event) { 26 | this.setState({value: event.target.value}); 27 | } 28 | 29 | render() { 30 | let dialogue_systems = this.state.dialogue_systems; 31 | 32 | let options = dialogue_systems.map((data) => 33 | 39 | ); 40 | 41 | return ( 42 |
43 | 47 | 48 |
49 | 50 | ) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /templates/static/js/components/EntityForm.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from "react"; 2 | import Typography from "@material-ui/core/Typography"; 3 | import Button from "@material-ui/core/Button"; 4 | 5 | const defaultValue = 50; 6 | 7 | export default class EntityForm extends Component { 8 | constructor(props) { 9 | super(props); 10 | this.state = { 11 | defaultValue: 50, 12 | comment0: "", 13 | comment1: "", 14 | bot_value: "none" 15 | }; 16 | 17 | this.fluencyValueRef = React.createRef(); 18 | this.sensitivenessValueRef = React.createRef(); 19 | this.specificityValueRef = React.createRef(); 20 | this.comment0Ref = React.createRef(); 21 | this.checked0 = false; 22 | 23 | this.handleRadioChange = this.handleRadioChange.bind(this); 24 | this.handleSubmit = this.handleSubmit.bind(this); 25 | 26 | this.handleHumanButton = this.handleHumanButton.bind(this); 27 | this.handleBotButton = this.handleBotButton.bind(this); 28 | this.handleUnsureButton = this.handleUnsureButton.bind(this); 29 | } 30 | 31 | componentDidMount() { 32 | this.setState({bot_value: 'none'}) 33 | } 34 | 35 | handleRadioChange(event, newValue) { 36 | this.setState({bot_value: newValue}); 37 | } 38 | 39 | handleSubmit() { 40 | if (this.state.bot_value === 'none') { 41 | alert('Please Decide if the Entity is a Bot or Human'); 42 | } else { 43 | let data = {}; 44 | if(this.state.bot_value !== 'unsure'){ 45 | data.is_human = this.state.bot_value === 'true'; 46 | }else{ 47 | data.is_human = null; 48 | } 49 | data.decision_turn = this.props.current_turn + 1; 50 | data.entity_number = this.props.entity_number; 51 | this.props.submissionCallback(data); 52 | this.setState({bot_value: 'none'}) 53 | } 54 | 55 | 56 | } 57 | 58 | handleHumanButton() { 59 | this.setState({bot_value: 'true'}) 60 | } 61 | 62 | handleBotButton() { 63 | this.setState({bot_value: 'false'}) 64 | } 65 | 66 | handleUnsureButton() { 67 | this.setState({bot_value: 'unsure'}) 68 | } 69 | 70 | render() { 71 | let submission_cls1 = 'submission_div'; 72 | if (this.props.no_spot_the_bot) { 73 | submission_cls1 = '' 74 | } 75 | 76 | let human_variant = 'outlined'; 77 | let bot_variant = 'outlined'; 78 | let unsure_variant = 'outlined'; 79 | if (this.state.bot_value === 'true') { 80 | human_variant = 'contained'; 81 | } else if (this.state.bot_value === 'false') { 82 | bot_variant = 'contained'; 83 | } else if (this.state.bot_value === 'unsure') { 84 | unsure_variant = 'contained'; 85 | } 86 | 87 | if (this.props.segmented) { 88 | return ( 89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 | 97 | Is Entity {this.props.entity_number} 98 |
99 | 100 |
101 | 105 | 109 | 114 |
115 |
116 |
117 | 118 |
119 | 123 |
124 |
125 |
126 |
127 |
128 | ); 129 | }else{ 130 | return ( 131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 | 139 | Is Entity {this.props.entity_number} 140 |
141 | 142 |
143 | 147 | 151 |
152 |
153 |
154 | 155 |
156 | 160 |
161 |
162 |
163 |
164 |
165 | ); 166 | } 167 | 168 | } 169 | } -------------------------------------------------------------------------------- /templates/static/js/components/FinalRatingForm.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import Button from "@material-ui/core/Button"; 3 | import Slider from "@material-ui/core/Slider"; 4 | import Typography from "@material-ui/core/Typography"; 5 | import Checkbox from "@material-ui/core/Checkbox"; 6 | import Input from "@material-ui/core/Input"; 7 | import ApiClient from "../api_client"; 8 | import {TextField} from "@material-ui/core"; 9 | import withStyles from "@material-ui/core/styles/withStyles"; 10 | 11 | const defaultValue = 50; 12 | 13 | const StyledButton = withStyles({ 14 | root: { 15 | margin: '0 15px 10px 0', 16 | }, 17 | label: { 18 | textTransform: 'capitalize', 19 | } 20 | })(Button); 21 | 22 | export default class AnnotationForm extends Component { 23 | constructor(props) { 24 | super(props); 25 | this.state = { 26 | fluencyWinner: null, 27 | sensitivenessWinner: null, 28 | specificityWinner: null 29 | }; 30 | 31 | this.handleSubmit = this.handleSubmit.bind(this); 32 | } 33 | 34 | 35 | handleSubmit() { 36 | const apiClient = new ApiClient(); 37 | const min = 1000000000; 38 | const max = 9999999999; 39 | 40 | let random_number = Math.round(min + Math.random() * (max - min)); 41 | let user_name = localStorage.getItem("user_name") ? localStorage.getItem("user_name") : "" 42 | 43 | if (this.state.fluencyWinner === null || this.state.sensitivenessWinner === null || this.state.specificityWinner === null) { 44 | alert('Please decide which entity performs better'); 45 | } else if (user_name === "") { 46 | alert('Please Enter your AMT WorkerID'); 47 | } else { 48 | let data = {}; 49 | data.entity0_annotation = {}; 50 | data.entity1_annotation = {}; 51 | data.entity0_annotation.fluencyValue = this.state.fluencyWinner === 'entity0' || this.state.fluencyWinner === 'tie'; 52 | data.entity1_annotation.fluencyValue = this.state.fluencyWinner === 'entity1' || this.state.fluencyWinner === 'tie'; 53 | 54 | data.entity0_annotation.sensitivenessValue = this.state.sensitivenessWinner === 'entity0' || this.state.sensitivenessWinner === 'tie'; 55 | data.entity1_annotation.sensitivenessValue = this.state.sensitivenessWinner === 'entity1' || this.state.sensitivenessWinner === 'tie'; 56 | 57 | data.entity0_annotation.specificityValue = this.state.specificityWinner === 'entity0' || this.state.specificityWinner === 'tie'; 58 | data.entity1_annotation.specificityValue = this.state.specificityWinner === 'entity1' || this.state.specificityWinner === 'tie'; 59 | 60 | data.random_number = random_number; 61 | 62 | this.props.submissionCallback(data); 63 | } 64 | 65 | 66 | } 67 | 68 | render() { 69 | 70 | let variant_dict = { 71 | true: 'contained', 72 | false: 'outlined' 73 | }; 74 | 75 | let selected_variant = 'contained'; 76 | let unselected_variant = 'outlined'; 77 | 78 | return ( 79 |
80 |
81 |
82 |
83 |
84 | Who perfromed better in: 85 |
86 | 87 |
88 | Fluency: 89 | this.setState({fluencyWinner: 'entity0'})} 92 | > 93 | Entity 0 94 | this.setState({fluencyWinner: 'entity1'})} 97 | > 98 | Entity 1 99 | this.setState({fluencyWinner: 'tie'})} 102 | > 103 | No Difference 104 |
105 | 106 |
107 | Sensibleness: 108 | this.setState({sensitivenessWinner: 'entity0'})} 111 | > 112 | Entity 0 113 | this.setState({sensitivenessWinner: 'entity1'})} 116 | > 117 | Entity 1 118 | this.setState({sensitivenessWinner: 'tie'})} 121 | > 122 | No Difference 123 |
124 | 125 | 126 |
127 | Specificity: 128 | this.setState({specificityWinner: 'entity0'})}> 131 | Entity 0 132 | this.setState({specificityWinner: 'entity1'})}> 135 | Entity 1 136 | this.setState({specificityWinner: 'tie'})}> 139 | No Difference 140 |
141 | 142 |
143 | 147 |
148 |
149 | 150 |
151 |
152 |
153 | ); 154 | } 155 | 156 | } -------------------------------------------------------------------------------- /templates/static/js/components/Instructions.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | 3 | export default class Home extends Component { 4 | constructor(props) { 5 | super(props); 6 | this.state = { 7 | show_instructions: true 8 | } 9 | } 10 | 11 | 12 | render() { 13 | let cls = ''; 14 | if (!this.state.show_instructions) { 15 | cls = 'instruction-text-hidden' 16 | } 17 | 18 | return ( 19 |
20 |
21 |

Instructions

22 | 25 |
26 |
27 |

28 | Below you are shown a snipped of a conversation between two entities. Your task is to decide for each entitiy if it is a bot or a human. 29 | Read each message carefully and then make your decision. In case you are not sure, select the "Unsure" option. 30 |

31 |

After your decision, rate the entities regarding the following features by selecting which entity performs better:

32 |

33 | Fluency: Which entities' language is more fluent and grammatically correct? 34 |

35 |

36 | Specificity: Which entities' responses are more specific and explicit in the 37 | given context? An answer is specific if it can be given only in the current context. 38 | (If one says "I love tennis", and the other responds "That is nice!" or "What's your favorite 39 | food?" then this is NOT specific. However, if the response is "I like Roger Federer!", it is 40 | specific as it explicitly refers to the tennis context.) 41 | 42 |

43 |

44 | Sensible:Which entities' responses are more sensible? If the answer seems 45 | confusing, illogical, contradictory, or factually wrong then it is NOT sensible. 46 |

47 |

48 | After your decision click on the Submitt button. Then the next conversation in 49 | the packet is loaded. 50 | After you finished the last conversation in the batch, you will be prompted with a submission 51 | code, which you have to paste in the mTurk form. 52 |

53 |
54 |
55 | ) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /templates/static/js/components/Leaderboard.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import ApiClient from '../api_client'; 3 | 4 | export default class Leaderboard extends Component { 5 | constructor(props) { 6 | super(props); 7 | this.state = {leaderboard: {}}; 8 | } 9 | 10 | componentDidMount() { 11 | this.loadLeaderboard(); 12 | } 13 | 14 | componentWillReceiveProps(nextProps) { 15 | this.loadLeaderboard(); 16 | } 17 | 18 | loadLeaderboard() { 19 | const apiClient = new ApiClient(); // No API key required for this endpoint! 20 | 21 | apiClient.getLeaderboard().then(res => { 22 | this.setState({leaderboard: res.data}); 23 | }); 24 | } 25 | 26 | render() { 27 | let tables = []; 28 | let leaderboard = this.state.leaderboard; 29 | 30 | return ( 31 | 32 | {this.renderLeaderboardTable(leaderboard)} 33 | 34 | ); 35 | } 36 | 37 | renderLeaderboardTable(currentLeaderboard) { 38 | let filterededArrData = Object.entries(currentLeaderboard).filter(entry => entry[1].number_of_annotations >= 20); 39 | 40 | //filterededArrData = arrData.filter(entry => entry[1].number_of_annotations > 20); 41 | 42 | let tableBody = filterededArrData.map(e => { 43 | return ( 44 | 45 | {e[1].user_name} 46 | {e[1].number_of_annotations} 47 | {e[1].score.toFixed(4)} 48 | {e[1].elo_score.toFixed(4)} 49 | 50 | ); 51 | }); 52 | 53 | return ( 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | {tableBody} 64 |
User NameNumber of AnnotationsScoreELO Score
65 | ) 66 | } 67 | } -------------------------------------------------------------------------------- /templates/static/js/components/MaxPackages.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | 3 | export default class MaxPackages extends Component { 4 | constructor(props) { 5 | super(props); 6 | } 7 | 8 | render() { 9 | return ( 10 |
11 |
12 |
13 |
14 |

Sorry, you already have annotated the maximum nubmer of Packages: {this.props.ann_pkgs} / {this.props.max_allowed}

15 |
16 |
17 |
18 |
19 | ) 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /templates/static/js/components/RandomCode.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import Leaderboard from "./Leaderboard"; 3 | import ApiClient from '../api_client'; 4 | 5 | export default class SubmittedForm extends Component { 6 | constructor(props) { 7 | super(props); 8 | } 9 | 10 | render() { 11 | if(!this.props.render){ 12 | return ( 13 |
14 | ); 15 | } 16 | 17 | return ( 18 |
19 |
20 |
21 |
22 |

Thank You for your submission. Your code is: {this.props.random_code}

23 |
24 |
25 |
26 |
27 | ) 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /templates/static/js/components/SingleAnnotationForm.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import ApiClient from "../api_client"; 3 | import EntityForm from "./EntityForm"; 4 | import FinalRatingForm from "./FinalRatingForm"; 5 | 6 | const defaultValue = 50; 7 | 8 | export default class SingleAnnotationForm extends Component { 9 | constructor(props) { 10 | super(props); 11 | this.state = { 12 | entity0_annotation: {}, 13 | entity1_annotation: {}, 14 | decided0: false, 15 | decided1: false, 16 | rating_provided: false 17 | }; 18 | 19 | this.handleSubmit = this.handleSubmit.bind(this); 20 | this.handleCheckChange = this.handleCheckChange.bind(this); 21 | this.handleUserName = this.handleUserName.bind(this); 22 | this.handleUserNameFocus = this.handleUserNameFocus.bind(this); 23 | this.handleRatingSubmit = this.handleRatingSubmit.bind(this); 24 | this.sendData = this.sendData.bind(this); 25 | } 26 | 27 | sendData() { 28 | if (this.state.decided0 && this.state.decided1 && this.state.rating_provided) { 29 | const apiClient = new ApiClient(); 30 | let decision_data = {}; 31 | decision_data.entity0_annotation = this.state.entity0_annotation; 32 | decision_data.entity1_annotation = this.state.entity1_annotation; 33 | decision_data.random_number = this.props.random_code; 34 | decision_data.convo_id = this.props.dialogue_id; 35 | decision_data.start_time = this.props.start_time; 36 | decision_data.user_name = this.props.user_name; 37 | decision_data.package_id = this.props.package_id; 38 | apiClient.postDecisionForDialogue(decision_data); 39 | this.props.random_dialogue_callback(); 40 | this.setState({decided0: false, decided1: false, rating_provided: false}) 41 | } 42 | } 43 | 44 | handleSubmit(data) { 45 | if (data.entity_number === 0) { 46 | this.setState({entity0_annotation: data, decided0: true}); 47 | } else { 48 | this.setState({entity1_annotation: data, decided1: true}); 49 | } 50 | this.props.entity_decision_cb(data.entity_number); 51 | } 52 | 53 | handleRatingSubmit(data) { 54 | data.entity0_annotation.is_human = this.state.entity0_annotation.is_human; 55 | data.entity1_annotation.is_human = this.state.entity1_annotation.is_human; 56 | 57 | data.entity0_annotation.decision_turn = this.state.entity0_annotation.decision_turn; 58 | data.entity1_annotation.decision_turn = this.state.entity1_annotation.decision_turn; 59 | 60 | data.entity0_annotation.entity_number = this.state.entity0_annotation.entity_number; 61 | data.entity1_annotation.entity_number = this.state.entity1_annotation.entity_number; 62 | this.setState({ 63 | entity0_annotation: data.entity0_annotation, 64 | entity1_annotation: data.entity1_annotation, 65 | rating_provided: true 66 | }, this.sendData); 67 | this.props.final_rating_cb(this.state.entity0_annotation.is_human, this.state.entity1_annotation.is_human) 68 | } 69 | 70 | handleCheckChange(event, newValue) { 71 | let tid = event.target.id; 72 | if (tid === 'ent0checkbox') { 73 | this.checked0 = newValue; 74 | } else if (tid === 'ent1checkbox') { 75 | this.checked1 = newValue; 76 | } 77 | } 78 | 79 | handleUserName(event, newValue) { 80 | console.log("new value", event.target.value); 81 | this.setState({user_name: event.target.value}); 82 | localStorage.setItem('user_name', this.state.user_name); 83 | } 84 | 85 | 86 | handleUserNameFocus() { 87 | this.setState({user_name: ""}); 88 | localStorage.setItem('user_name', this.state.user_name); 89 | } 90 | 91 | render() { 92 | 93 | let submission_cls0 = 'submission_div'; 94 | if (this.props.decision_show0) { 95 | submission_cls0 = '' 96 | } 97 | 98 | let submission_cls1 = 'submission_div'; 99 | if (this.props.decision_show1) { 100 | submission_cls1 = '' 101 | } 102 | 103 | let submission_cls_rating = 'submission_div'; 104 | if (this.props.show_final_rating) { 105 | submission_cls_rating = '' 106 | } 107 | 108 | if (this.props.show_final_rating) { 109 | return ( 110 |
111 |
112 |
113 | 120 |
121 |
122 | 129 |
130 |
131 |
132 |
133 | 136 |
137 |
138 |
139 | 140 | ); 141 | } else { 142 | return (
143 |
144 |
145 | 152 |
153 |
154 | 161 |
162 |
163 |
) 164 | } 165 | } 166 | 167 | } -------------------------------------------------------------------------------- /templates/static/js/components/legacy/RandomCode.jsx: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import Leaderboard from "./Leaderboard"; 3 | import ApiClient from '../api_client'; 4 | 5 | export default class SubmittedForm extends Component { 6 | constructor(props) { 7 | super(props); 8 | 9 | this.computeScore = this.computeScore.bind(this); 10 | } 11 | 12 | computeScore() { 13 | let score = 0; 14 | if (this.props.is_human0 === this.props.is_human0_pred) { 15 | score += 0.5; 16 | } 17 | if (this.props.is_human1 === this.props.is_human1_pred) { 18 | score += 0.5; 19 | } 20 | score -= 0.3 * (this.props.decision_turn0 / this.props.convo_len); 21 | score -= 0.3 * (this.props.decision_turn1 / this.props.convo_len); 22 | return score 23 | } 24 | 25 | render() { 26 | if(!this.props.render){ 27 | return ( 28 |
29 | ); 30 | } 31 | 32 | let score_color = ''; 33 | let score = this.computeScore(); 34 | if (score <= 0.5) { 35 | score_color = 'red_score'; 36 | } else { 37 | score_color = 'green_score'; 38 | } 39 | 40 | let show_tips = 'submission_div'; 41 | if (this.props.final_score < 0.75 && this.props.turn_penalty < 0.2) { 42 | show_tips = '' 43 | } 44 | 45 | return ( 46 |
47 |
48 |
49 |
50 |

Thank You for your submission. Your code is: {this.props.random_code}

51 |
52 |
53 |

Your Score for this Conversation: {score.toFixed(3)} 55 |

56 |
57 |
58 |

59 | Your Scores seem to be low. Try to open more turns, you only 60 | open {(this.props.turn_penalty*100).toFixed(3)} percent of the turns, which is a bit low. 61 |

62 |
63 |
64 |
65 |
66 |
67 |

Leaderboard

68 | 69 |
70 |
71 |
72 | ) 73 | } 74 | 75 | } -------------------------------------------------------------------------------- /templates/static/js/components/legacy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jderiu/spot-the-bot-code/6b6ffa1c8716b0a6b1d8bfd795efe4043e8e2d28/templates/static/js/components/legacy/__init__.py -------------------------------------------------------------------------------- /templates/static/js/index.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import DialougeDomainFilter from "./components/DialougeDomainFilter"; 4 | import DialogueContainer from "./components/DialougeContainer"; 5 | import DialoguePackageContainer from "./components/DialougePackageContainer"; 6 | import Instructions from "./components/Instructions"; 7 | import Leaderboard from "./components/Leaderboard"; 8 | 9 | if(window.convo_id === 0 ||window.convo_id == undefined){ 10 | if(window.show_leaderboard && !window.full_convo){ 11 | ReactDOM.render(, document.getElementById('content')); 12 | }else if(window.pkg_id){ 13 | ReactDOM.render(, document.getElementById('infobox-container')); 14 | if(window.segmented === true){ 15 | ReactDOM.render(, document.getElementById('content')); 16 | }else{ 17 | ReactDOM.render(, document.getElementById('content')); 18 | } 19 | }else{ 20 | ReactDOM.render(, document.getElementById('infobox-container')); 21 | ReactDOM.render(, document.getElementById('content')); 22 | } 23 | }else{ 24 | if(window.full_convo){ 25 | ReactDOM.render(, document.getElementById('infobox-container')); 26 | ReactDOM.render(, document.getElementById('content')); 27 | }else{ 28 | ReactDOM.render(, document.getElementById('infobox-container')); 29 | ReactDOM.render(, document.getElementById('content')); 30 | } 31 | 32 | 33 | } 34 | 35 | -------------------------------------------------------------------------------- /templates/static/js/routes.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { HashRouter, Route, hashHistory } from 'react-router-dom'; 3 | import DialougeDomainFilter from "./components/DialougeDomainFilter"; 4 | import DialogueContainer from "./components/DialougeContainer"; 5 | 6 | // import more components 7 | export default ( 8 | 9 |
10 | 11 | } /> 12 |
13 |
14 | ); 15 | 16 | // if(window.convo_id === 0 ||window.convo_id == undefined){ 17 | // ReactDOM.render(, document.getElementById('content')); 18 | // }else{ 19 | // ReactDOM.render(, document.getElementById('infobox-container')); 20 | // ReactDOM.render(, document.getElementById('content')); 21 | // } 22 | 23 | -------------------------------------------------------------------------------- /templates/static/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hello_template", 3 | "version": "1.0.0", 4 | "description": "A template for creating a full stack wep app with Flask, NPM, Webpack, and Reactjs", 5 | "main": "index.jsx", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "build": "webpack -p --progress --config webpack.config.js", 9 | "dev-build": "webpack --progress -d --config webpack.config.js", 10 | "watch": "webpack --progress -d --config webpack.config.js --watch" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/Eyongkevin/portfolio_template.git" 15 | }, 16 | "keywords": [ 17 | "portfolio", 18 | "template", 19 | "python", 20 | "react", 21 | "npm", 22 | "webpack" 23 | ], 24 | "author": "Eyong Kevin", 25 | "license": "ISC", 26 | "bugs": { 27 | "url": "https://github.com/Eyongkevin/portfolio_template/issues" 28 | }, 29 | "homepage": "https://github.com/Eyongkevin/portfolio_template#readme", 30 | "devDependencies": { 31 | "babel-core": "^6.26.3", 32 | "babel-loader": "^7.1.5", 33 | "babel-preset-es2015": "^6.24.1", 34 | "babel-preset-react": "^6.24.1", 35 | "react": "^16.12.0", 36 | "react-dom": "^16.12.0", 37 | "webpack": "^4.23.1", 38 | "webpack-cli": "^3.3.11" 39 | }, 40 | "dependencies": { 41 | "@material-ui/core": "^4.9.4", 42 | "axios": "^0.19.2", 43 | "history": "^4.7.2", 44 | "react-bootstrap-range-slider": "^0.3.0", 45 | "react-json-tree": "^0.11.2", 46 | "react-router-dom": "^4.3.1", 47 | "react-scroll": "^1.7.16", 48 | "react-slider": "^1.0.3" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /templates/static/webpack.config.js: -------------------------------------------------------------------------------- 1 | const webpack = require('webpack'); 2 | const resolve = require('path').resolve; 3 | 4 | const config = { 5 | devtool: 'eval-source-map', 6 | entry: __dirname + '/js/index.jsx', 7 | output:{ 8 | path: resolve('../public'), 9 | filename: 'bundle.js', 10 | publicPath: resolve('../public') 11 | }, 12 | resolve: { 13 | extensions: ['.js','.jsx','.css'] 14 | }, 15 | module: { 16 | rules: [ 17 | { 18 | test: /\.jsx?/, 19 | loader: 'babel-loader', 20 | exclude: /node_modules/ 21 | }, 22 | { 23 | test: /\.css$/, 24 | loader: 'style-loader!css-loader?modules' 25 | }] 26 | } 27 | }; 28 | module.exports = config; 29 | --------------------------------------------------------------------------------