├── codalab ├── scoring_program ├── competition │ ├── codwoe-logo.png │ ├── data.html │ ├── terms_and_conditions.html │ ├── overview.html │ ├── evaluation.html │ └── competition.yaml ├── README.md └── Makefile ├── code ├── metadata ├── codwoe_entrypoint.py ├── check_output.py ├── models.py ├── data.py ├── revdict.py ├── defmod.py └── score.py ├── .gitignore ├── baseline_archs ├── code │ ├── metadata │ ├── codwoe_entrypoint.py │ ├── check_output.py │ ├── score.py │ ├── data.py │ ├── revdict.py │ ├── models.py │ └── defmod.py └── README.md ├── rankings ├── final_rankings │ ├── revdict-electra_rankings-per-users.csv │ ├── defmod_rankings-per-users.csv │ ├── revdict-sgns_rankings-per-users.csv │ └── revdict-char_rankings-per-users.csv ├── make_rankings_defmod.py ├── make_rankings_char.py ├── make_rankings_electra.py ├── make_rankings_sgns.py ├── README.md └── submission_ranks │ └── results_revdict-electra-rankings.csv ├── requirements.txt ├── docker └── Dockerfile ├── data └── README.md └── README.md /codalab/scoring_program: -------------------------------------------------------------------------------- 1 | ../code/ -------------------------------------------------------------------------------- /codalab/competition/codwoe-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimotheeMickus/codwoe/HEAD/codalab/competition/codwoe-logo.png -------------------------------------------------------------------------------- /code/metadata: -------------------------------------------------------------------------------- 1 | command: python3 $program/codwoe_entrypoint.py score $input/res/ --reference_files_dir $input/ref/ --output_file $output 2 | description: run scoring program on any data 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .venv/ 3 | .tb/ 4 | data/* 5 | *.json 6 | *.pt 7 | *.zip 8 | scores.txt 9 | !data/README.md 10 | !data/trial-data_all.zip 11 | !data/train-data_all.zip 12 | -------------------------------------------------------------------------------- /baseline_archs/code/metadata: -------------------------------------------------------------------------------- 1 | command: python3 $program/codwoe_entrypoint.py score $input/res/ --reference_files_dir $input/ref/ --output_file $output 2 | description: run scoring program on any data 3 | -------------------------------------------------------------------------------- /codalab/README.md: -------------------------------------------------------------------------------- 1 | # CodaLab website configuration 2 | 3 | Adapted from the Sample CodaLab competition for SemEval. 4 | 5 | The scoring program is a symlink from the code directory in the root of this 6 | repository. 7 | 8 | The reference does not indicate a field in the YAML 9 | -------------------------------------------------------------------------------- /codalab/Makefile: -------------------------------------------------------------------------------- 1 | competition/scoring_program.zip: scoring_program/* 2 | cd scoring_program && zip ./scoring_program.zip metadata *.py && cd .. && mv scoring_program/scoring_program.zip ./competition/ 3 | 4 | competition.zip: competition/* competition/scoring_program.zip 5 | cd competition && zip ../competition.zip * && cd .. 6 | 7 | submission.zip: submission/* 8 | cd submission && zip ../submission.zip * && cd .. 9 | -------------------------------------------------------------------------------- /rankings/final_rankings/revdict-electra_rankings-per-users.csv: -------------------------------------------------------------------------------- 1 | user,EN MSE,EN cos,EN rank,FR MSE,FR cos,FR rank,RU MSE,RU cos,RU rank,Avg EN,Avg FR,Avg RU,Rank EN,Rank FR,Rank RU 2 | Locchi,1.0,13.0,10.0,,,,,,,8.0,,,3,, 3 | Nihed_Bendahman_,7.0,11.0,3.0,6.0,7.0,3.0,8.0,8.0,8.0,7.0,5.333333333333333,8.0,2,2,4 4 | WENGSYX,13.0,4.0,9.0,11.0,6.0,11.0,1.0,4.0,12.0,8.666666666666666,9.333333333333334,5.666666666666667,4,4,2 5 | aardoiz,,,,,,,,,,,,,,, 6 | chlrbgus321,,,,,,,,,,,,,,, 7 | cunliang.kong,,,,,,,,,,,,,,, 8 | dkorenci,16.0,18.0,1.0,12.0,14.0,1.0,12.0,7.0,1.0,11.666666666666666,9.0,6.666666666666667,5,3,3 9 | emukans,,,,,,,,,,,,,,, 10 | guntis,,,,,,,,,,,,,,, 11 | lukechan1231,,,,,,,,,,,,,,, 12 | pzchen,2.0,2.0,12.0,2.0,2.0,10.0,3.0,2.0,10.0,5.333333333333333,4.666666666666667,5.0,1,1,1 13 | talent404,,,,,,,,,,,,,,, 14 | the0ne,10.0,5.0,23.0,,,,,,,12.666666666666666,,,6,, 15 | tthhanh,,,,,,,,,,,,,,, 16 | zhwa3087,,,,,,,,,,,,,,, 17 | -------------------------------------------------------------------------------- /rankings/make_rankings_defmod.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | METRICS = ['MvSc.', 'S-BLEU', 'L-BLEU'] 3 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU'] 4 | df = pd.read_csv('res_defmod.csv') 5 | def get_sorted_vals(colname): 6 | return sorted(df[colname].dropna(), reverse=True) 7 | for colname in [f"{lang} {metric}" for lang in LANGS for metric in METRICS]: 8 | sorted_vals = get_sorted_vals(colname) 9 | def float_to_rank(cell): 10 | if pd.isna(cell): return cell 11 | return sum(i >= cell for i in sorted_vals) 12 | df[colname] = df[colname].apply(float_to_rank) 13 | df.to_csv('results_defmod.csv', index=False) 14 | df_ranks = df.groupby('user').min() 15 | for lang in LANGS: 16 | def get_mean_rank(row): 17 | metrics = [row[f"{lang} {metric}"] for metric in METRICS] 18 | if any(map(pd.isna, metrics)): return pd.NA 19 | return sum(metrics) / len(metrics) 20 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1) 21 | del df_ranks['Date'] 22 | del df_ranks['filename'] 23 | df_ranks.to_csv('defmod_rankings-per-users.csv') 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.12.0 2 | cachetools==4.2.2 3 | certifi==2020.12.5 4 | chardet==4.0.0 5 | click==8.0.1 6 | cycler==0.10.0 7 | filelock==3.0.12 8 | google-auth==1.30.0 9 | google-auth-oauthlib==0.4.4 10 | grpcio==1.37.1 11 | huggingface-hub==0.0.12 12 | idna==2.10 13 | joblib==1.0.1 14 | kiwisolver==1.3.1 15 | Markdown==3.3.4 16 | matplotlib==3.4.2 17 | moverscore==1.0.3 18 | nltk==3.6.7 19 | numpy>=1.20.3 20 | oauthlib==3.1.0 21 | packaging==21.0 22 | Pillow==8.3.0 23 | portalocker==2.3.0 24 | protobuf==3.17.0 25 | pyasn1==0.4.8 26 | pyasn1-modules==0.2.8 27 | pyemd 28 | pyparsing==2.4.7 29 | python-dateutil==2.8.1 30 | PyYAML==5.4.1 31 | regex==2022.1.18 32 | requests==2.25.1 33 | requests-oauthlib==1.3.0 34 | rsa==4.7.2 35 | sacremoses==0.0.45 36 | sentencepiece==0.1.96 37 | six==1.16.0 38 | tensorboard==2.5.0 39 | tensorboard-data-server==0.6.1 40 | tensorboard-plugin-wit==1.8.0 41 | tokenizers==0.8.1rc2 42 | torch==1.8.1 43 | tqdm==4.60.0 44 | transformers==3.1.0 45 | typing==3.7.4.3 46 | typing-extensions==3.10.0.0 47 | urllib3==1.26.4 48 | Werkzeug==2.0.1 49 | -------------------------------------------------------------------------------- /rankings/make_rankings_char.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | METRICS = { 4 | 'MSE':False, 5 | 'cos':True, 6 | 'rank':False, 7 | } 8 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU'] 9 | 10 | df = pd.read_csv('submission_scores/res_revdict-char.csv') 11 | def get_sorted_vals(colname): 12 | return sorted(df[colname].dropna(), reverse=False) 13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]: 14 | colname = f"{lang} {metric}" 15 | sorted_vals = get_sorted_vals(colname) 16 | to_maximize = METRICS[metric] 17 | def float_to_rank(cell): 18 | if pd.isna(cell): 19 | return cell 20 | if to_maximize: 21 | return sum(i >= cell for i in sorted_vals) 22 | return sum(i <= cell for i in sorted_vals) 23 | 24 | df[colname] = df[colname].apply(float_to_rank) 25 | df.to_csv('submission_ranks/results_revdict-char-rankings.csv', index=False) 26 | df_ranks = df.groupby('user').min() 27 | for lang in LANGS: 28 | def get_mean_rank(row): 29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS] 30 | if any(map(pd.isna, metrics)): return pd.NA 31 | return sum(metrics) / len(metrics) 32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1) 33 | del df_ranks['Date'] 34 | del df_ranks['filename'] 35 | df_ranks.to_csv('final_rankings/revdict-char_rankings-per-users.csv') 36 | -------------------------------------------------------------------------------- /rankings/make_rankings_electra.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | METRICS = { 4 | 'MSE':False, 5 | 'cos':True, 6 | 'rank':False, 7 | } 8 | LANGS = ['EN', 'FR', 'RU'] 9 | 10 | df = pd.read_csv('submission_scores/res_revdict-electra.csv') 11 | def get_sorted_vals(colname): 12 | return sorted(df[colname].dropna(), reverse=False) 13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]: 14 | colname = f"{lang} {metric}" 15 | sorted_vals = get_sorted_vals(colname) 16 | to_maximize = METRICS[metric] 17 | def float_to_rank(cell): 18 | if pd.isna(cell): 19 | return cell 20 | if to_maximize: 21 | return sum(i >= cell for i in sorted_vals) 22 | return sum(i <= cell for i in sorted_vals) 23 | 24 | df[colname] = df[colname].apply(float_to_rank) 25 | df.to_csv('submission_ranks/results_revdict-electra-rankings.csv', index=False) 26 | df_ranks = df.groupby('user').min() 27 | for lang in LANGS: 28 | def get_mean_rank(row): 29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS] 30 | if any(map(pd.isna, metrics)): return pd.NA 31 | return sum(metrics) / len(metrics) 32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1) 33 | del df_ranks['Date'] 34 | del df_ranks['filename'] 35 | df_ranks.to_csv('final_rankings/revdict-electra_rankings-per-users.csv') 36 | -------------------------------------------------------------------------------- /rankings/make_rankings_sgns.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | METRICS = { 4 | 'MSE':False, 5 | 'cos':True, 6 | 'rank':False, 7 | } 8 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU'] 9 | 10 | df = pd.read_csv('submission_scores/res_revdict-sgns.csv') 11 | def get_sorted_vals(colname): 12 | return sorted(df[colname].dropna(), reverse=False) 13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]: 14 | colname = f"{lang} {metric}" 15 | sorted_vals = get_sorted_vals(colname) 16 | to_maximize = METRICS[metric] 17 | def float_to_rank(cell): 18 | if pd.isna(cell): 19 | return cell 20 | if to_maximize: 21 | return sum(i >= cell for i in sorted_vals) 22 | return sum(i <= cell for i in sorted_vals) 23 | 24 | df[colname] = df[colname].apply(float_to_rank) 25 | df.to_csv('submission_ranks/results_revdict-sgns-rankings.csv', index=False) 26 | df_ranks = df.groupby('user').min() 27 | for lang in LANGS: 28 | def get_mean_rank(row): 29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS] 30 | if any(map(pd.isna, metrics)): return pd.NA 31 | return sum(metrics) / len(metrics) 32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1) 33 | del df_ranks['Date'] 34 | del df_ranks['filename'] 35 | df_ranks.to_csv('final_rankings/revdict-sgns_rankings-per-users.csv') 36 | -------------------------------------------------------------------------------- /code/codwoe_entrypoint.py: -------------------------------------------------------------------------------- 1 | import defmod, revdict, check_output, score 2 | 3 | if __name__ == "__main__": 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser(description="demo script for participants") 7 | subparsers = parser.add_subparsers(dest="command", required=True) 8 | parser_defmod = defmod.get_parser( 9 | parser=subparsers.add_parser( 10 | "defmod", help="run a definition modeling baseline" 11 | ) 12 | ) 13 | parser_revdict = revdict.get_parser( 14 | parser=subparsers.add_parser( 15 | "revdict", help="run a reverse dictionary baseline" 16 | ) 17 | ) 18 | parser_check_output = check_output.get_parser( 19 | parser=subparsers.add_parser( 20 | "check-format", help="check the format of a submission file" 21 | ) 22 | ) 23 | parser_score = score.get_parser( 24 | parser=subparsers.add_parser("score", help="evaluate a submission") 25 | ) 26 | args = parser.parse_args() 27 | if args.command == "defmod": 28 | defmod.main(args) 29 | elif args.command == "revdict": 30 | revdict.main(args) 31 | elif args.command == "check-format": 32 | check_output.main(args.submission_file) 33 | elif args.command == "score": 34 | score.main(args) 35 | -------------------------------------------------------------------------------- /baseline_archs/code/codwoe_entrypoint.py: -------------------------------------------------------------------------------- 1 | import defmod, revdict, check_output, score 2 | 3 | if __name__ == "__main__": 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser(description="demo script for participants") 7 | subparsers = parser.add_subparsers(dest="command", required=True) 8 | parser_defmod = defmod.get_parser( 9 | parser=subparsers.add_parser( 10 | "defmod", help="run a definition modeling baseline" 11 | ) 12 | ) 13 | parser_revdict = revdict.get_parser( 14 | parser=subparsers.add_parser( 15 | "revdict", help="run a reverse dictionary baseline" 16 | ) 17 | ) 18 | parser_check_output = check_output.get_parser( 19 | parser=subparsers.add_parser( 20 | "check-format", help="check the format of a submission file" 21 | ) 22 | ) 23 | parser_score = score.get_parser( 24 | parser=subparsers.add_parser("score", help="evaluate a submission") 25 | ) 26 | args = parser.parse_args() 27 | if args.command == "defmod": 28 | defmod.main(args) 29 | elif args.command == "revdict": 30 | revdict.main(args) 31 | elif args.command == "check-format": 32 | check_output.main(args.submission_file) 33 | elif args.command == "score": 34 | score.main(args) 35 | -------------------------------------------------------------------------------- /rankings/final_rankings/defmod_rankings-per-users.csv: -------------------------------------------------------------------------------- 1 | user,EN MvSc.,EN S-BLEU,EN L-BLEU,ES MvSc.,ES S-BLEU,ES L-BLEU,FR MvSc.,FR S-BLEU,FR L-BLEU,IT MvSc.,IT S-BLEU,IT L-BLEU,RU MvSc.,RU S-BLEU,RU L-BLEU,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU 2 | Locchi,58.0,43.0,44.0,27.0,28.0,28.0,,,,15.0,28.0,28.0,,,,48.333333333333336,27.666666666666668,,23.666666666666668,,8,6,,7, 3 | Nihed_Bendahman_,,,,,,,,,,,,,,,,,,,,,,,,, 4 | WENGSYX,76.0,75.0,75.0,28.0,29.0,29.0,31.0,32.0,32.0,28.0,13.0,12.0,37.0,36.0,36.0,75.33333333333333,28.666666666666668,31.666666666666668,17.666666666666668,36.333333333333336,9,7,6,6,6 5 | aardoiz,,,,,,,,,,,,,,,,,,,,,,,,, 6 | chlrbgus321,,,,,,,,,,,,,,,,,,,,,,,,, 7 | cunliang.kong,2.0,9.0,9.0,2.0,13.0,14.0,12.0,6.0,4.0,2.0,2.0,2.0,2.0,4.0,8.0,6.666666666666667,9.666666666666666,7.333333333333333,2.0,4.666666666666667,3,2,3,1,2 8 | dkorenci,12.0,1.0,2.0,6.0,1.0,1.0,2.0,2.0,8.0,7.0,15.0,15.0,18.0,18.0,18.0,5.0,2.6666666666666665,4.0,12.333333333333334,18.0,2,1,1,5,5 9 | emukans,15.0,25.0,27.0,3.0,21.0,21.0,1.0,13.0,14.0,6.0,14.0,14.0,8.0,15.0,15.0,22.333333333333332,15.0,9.333333333333334,11.333333333333334,12.666666666666666,5,4,4,4,3 10 | guntis,26.0,29.0,28.0,,,,,,,,,,,,,27.666666666666668,,,,,6,,,, 11 | lukechan1231,34.0,50.0,53.0,21.0,23.0,23.0,4.0,21.0,21.0,5.0,7.0,7.0,5.0,17.0,17.0,45.666666666666664,22.333333333333332,15.333333333333334,6.333333333333333,13.0,7,5,5,3,4 12 | pzchen,5.0,12.0,12.0,5.0,18.0,18.0,18.0,1.0,2.0,4.0,4.0,4.0,4.0,2.0,2.0,9.666666666666666,13.666666666666666,7.0,4.0,2.6666666666666665,4,3,2,2,1 13 | talent404,3.0,3.0,1.0,,,,,,,,,,,,,2.3333333333333335,,,,,1,,,, 14 | the0ne,,,,,,,,,,,,,,,,,,,,,,,,, 15 | tthhanh,,,,,,,,,,,,,,,,,,,,,,,,, 16 | zhwa3087,,,,,,,,,,,,,,,,,,,,,,,,, 17 | -------------------------------------------------------------------------------- /rankings/final_rankings/revdict-sgns_rankings-per-users.csv: -------------------------------------------------------------------------------- 1 | user,EN MSE,EN cos,EN rank,ES MSE,ES cos,ES rank,FR MSE,FR cos,FR rank,IT MSE,IT cos,IT rank,RU MSE,RU cos,RU rank,Unnamed: 19,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU 2 | Locchi,8.0,14.0,17.0,,,,,,,8.0,13.0,16.0,,,,False,13.0,,,12.333333333333334,,5,,,4, 3 | Nihed_Bendahman_,14.0,26.0,8.0,15.0,19.0,10.0,14.0,20.0,13.0,18.0,16.0,14.0,10.0,15.0,12.0,False,16.0,14.666666666666666,15.666666666666666,16.0,12.333333333333334,6,5,4,6,4 4 | WENGSYX,2.0,6.0,12.0,1.0,6.0,8.0,4.0,5.0,7.0,4.0,10.0,12.0,1.0,1.0,7.0,False,6.666666666666667,5.0,5.333333333333333,8.666666666666666,3.0,2,2,2,3,1 5 | aardoiz,,,,16.0,1.0,1.0,,,,,,,,,,False,,6.0,,,,,3,,, 6 | chlrbgus321,1.0,4.0,9.0,,,,,,,,,,,,,True,4.666666666666667,,,,,1,,,, 7 | cunliang.kong,,,,,,,,,,,,,,,,False,,,,,,,,,, 8 | dkorenci,28.0,1.0,1.0,7.0,2.0,2.0,11.0,1.0,1.0,7.0,1.0,1.0,11.0,2.0,1.0,False,10.0,3.6666666666666665,4.333333333333333,3.0,4.666666666666667,4,1,1,1,2 9 | emukans,,,,,,,,,,,,,,,,False,,,,,,,,,, 10 | guntis,,,,,,,,,,,,,,,,False,,,,,,,,,, 11 | lukechan1231,,,,,,,,,,,,,,,,False,,,,,,,,,, 12 | pzchen,4.0,8.0,11.0,3.0,10.0,12.0,1.0,8.0,9.0,1.0,2.0,8.0,2.0,7.0,8.0,False,7.666666666666667,8.333333333333334,6.0,3.6666666666666665,5.666666666666667,3,4,3,2,3 13 | talent404,,,,,,,,,,,,,,,,True,,,,,,,,,, 14 | the0ne,16.0,22.0,30.0,,,,,,,,,,,,,False,22.666666666666668,,,,,8,,,, 15 | tthhanh,19.0,27.0,24.0,17.0,26.0,22.0,17.0,18.0,22.0,21.0,25.0,22.0,21.0,25.0,22.0,False,23.333333333333332,21.666666666666668,19.0,22.666666666666668,22.666666666666668,9,7,6,7,6 16 | zhwa3087,22.0,15.0,13.0,11.0,17.0,19.0,12.0,17.0,21.0,9.0,14.0,15.0,15.0,11.0,13.0,False,16.666666666666668,15.666666666666666,16.666666666666668,12.666666666666666,13.0,7,6,5,5,5 17 | -------------------------------------------------------------------------------- /rankings/final_rankings/revdict-char_rankings-per-users.csv: -------------------------------------------------------------------------------- 1 | user,EN MSE,EN cos,EN rank,ES MSE,ES cos,ES rank,FR MSE,FR cos,FR rank,IT MSE,IT cos,IT rank,RU MSE,RU cos,RU rank,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU 2 | Locchi,1.0,1.0,11.0,,,,,,,6.0,7.0,17.0,,,,4.333333333333333,,,10.0,,1,,,4, 3 | Nihed_Bendahman_,7.0,9.0,8.0,7.0,7.0,8.0,9.0,9.0,6.0,8.0,9.0,10.0,7.0,8.0,14.0,8.0,7.333333333333333,8.0,9.0,9.666666666666666,2,2,2,3,4 4 | WENGSYX,24.0,21.0,12.0,20.0,9.0,23.0,18.0,17.0,27.0,21.0,21.0,18.0,22.0,19.0,15.0,19.0,17.333333333333332,20.666666666666668,20.0,18.666666666666668,7,5,5,6,5 5 | aardoiz,,,,,,,,,,,,,,,,,,,,,,,,, 6 | chlrbgus321,,,,,,,,,,,,,,,,,,,,,,,,, 7 | cunliang.kong,,,,,,,,,,,,,,,,,,,,,,,,, 8 | dkorenci,17.0,22.0,1.0,10.0,16.0,1.0,12.0,15.0,2.0,12.0,10.0,1.0,8.0,11.0,1.0,13.333333333333334,9.0,9.666666666666666,7.666666666666667,6.666666666666667,4,3,4,2,2 9 | emukans,,,,,,,,,,,,,,,,,,,,,,,,, 10 | guntis,,,,,,,,,,,,,,,,,,,,,,,,, 11 | lukechan1231,,,,,,,,,,,,,,,,,,,,,,,,, 12 | pzchen,5.0,6.0,15.0,2.0,3.0,12.0,1.0,1.0,7.0,2.0,2.0,12.0,1.0,1.0,8.0,8.666666666666666,5.666666666666667,3.0,5.333333333333333,3.3333333333333335,3,1,1,1,1 13 | talent404,,,,,,,,,,,,,,,,,,,,,,,,, 14 | the0ne,10.0,3.0,28.0,,,,,,,,,,,,,13.666666666666666,,,,,5,,,, 15 | tthhanh,,,,,,,,,,,,,,,,,,,,,,,,, 16 | zhwa3087,19.0,19.0,10.0,17.0,15.0,6.0,13.0,12.0,1.0,9.0,6.0,16.0,15.0,6.0,6.0,16.0,12.666666666666666,8.666666666666666,10.333333333333334,9.0,6,4,3,5,3 17 | ,,,,,,,,,,,,,,,,,,,,,,,,, 18 | ,,,,,,,,,,,,,,,,,,,,,,,,, 19 | ,,,,,,,,,,,,,,,,,,,,,,,,, 20 | ,,,,,,,,,,,,,,,,,,,,,,,,, 21 | ,,,,,,,,,,,,,,,,,,,,,,,,, 22 | ,,,,,,,,,,,,,,,,,,,,,,,,, 23 | ,,,,,,,,,,,,,,,,,,,,,,,,, 24 | ,,,,,,,,,,,,,,,,,,,,,,,,, 25 | ,,,,,,,,,,,,,,,,,,,,,,,,, 26 | ,,,,,,,,,,,,,,,,,,,,,,,,, 27 | ,,,,,,,,,,,,,,,,,,,,,,,,, 28 | ,,,,,,,,,,,,,,,,,,,,,,,,, 29 | ,,,,,,,,,,,,,,,,,,,,,,,,, 30 | ,,,,,,,,,,,,,,,,,,,,,,,,, 31 | ,,,,,,,,,,,,,,,,,,,,,,,,, 32 | ,,,,,,,,,,,,,,,,,,,,,,,,, 33 | ,,,,,,,,,,,,,,,,,,,,,,,,, 34 | ,,,,,,,,,,,,,,,,,,,,,,,,, 35 | ,,,,,zz,,,,,,,,,,,,,,,,,,,, 36 | -------------------------------------------------------------------------------- /codalab/competition/data.html: -------------------------------------------------------------------------------- 1 |

Data links

2 | 3 |

4 | Data for the competition is available on 5 | our git 6 | repository. The complete datasets will be made available at the end of the 7 | evaluation phase (January 31st). 8 |

9 | 10 |

Data format & contents

11 |

12 | All datasets are in JSON format. We minimize whitespace for memory consumption 13 | purposes. Trial, training and development datasets for both definition 14 | modeling and reverse dictionary tracks are shared: as the two tasks are 15 | converse tasks, the source for either one is the target of the other. Test 16 | datasets will be distinct, and have no overlap between the two tracks. 17 |

18 |

19 | Datasets are available for the five following languages: EN, ES, FR, IT, RU. 20 | Datasets are distinct per languages. All five languages contain embeddings for 21 | two distinct types of architectures: "char" (character-based embeddings) and 22 | "sgns" (word2vec Skip-Gram with Negative Sampling). EN, FR and RU have in 23 | addition a third type of embedding available, namely "electra" 24 | (Transformer-based contextual embeddings). 25 |

26 |

27 | All embeddings for any given language were trained on the same corpus; 28 | embedding training corpora across languages were designed to be comparable. 29 | All computation details will be provided in the task description paper. 30 |

31 |

32 | The trial dataset is annotated much more richly than the other datasets. 33 | During the shared task, we will only provide gloss, id and embeddings. After 34 | the evaluation phase, we will provide complete datasets, which will also 35 | include the word being defined and its part of speech. 36 |

37 |

38 | We very strongly encourage participants to reserve the trial dataset for 39 | running manual evaluations of their systems' production. The presence of a 40 | manual evaluation in system descriptions will be taken into account during the 41 | reviewing process and discussed in the task paper. 42 |

43 |

License Information

44 |

45 | The complete datasets, embedding architectures and embedding models will be 46 | made publicly available after the evaluation phase under a CC-BY-SA license. 47 | Please link to this page and cite our upcoming task description paper if you 48 | use these datasets in your own work. 49 |

50 |

51 | Dictionary data has been extracted from dumps provided by 52 | Sérasset (2014). 53 |

54 | -------------------------------------------------------------------------------- /baseline_archs/README.md: -------------------------------------------------------------------------------- 1 | # Baseline scores 2 | 3 | Here are baseline results on the development set for the two tracks, obtained with the architectures described in this sub-directory. 4 | The code here is itself based on the baselines we provided earlier, along with a couple improvements: 5 | - a principled way of selecting hyperparameters (using Bayesian Optimization), 6 | - a sentence-piece retokenization, to ensure the vocabulary is of the same size for all languages, 7 | - a beam-search decoding for the definition modeling pipeline. 8 | 9 | ## Installation 10 | To train these models, you will also need need the `scikit-learn` and `scikit-optimize` libraries, which we used to select hyperparameters. 11 | ```sh 12 | pip3 install scikit-learn==0.24.2 scikit-optimize==0.8.1 13 | ``` 14 | 15 | 16 | ## Scores 17 | For the Reverse Dictionary track results, rows will correspond to different targets. 18 | On the other hand, rows of the Definition Modeling table below correspond to different inputs to the system. 19 | Scores were computed using the scoring script provided in this git (`code/score.py`). 20 | 21 | ### Reverse Dictionary track 22 | 23 | | | MSE | Cosine | Ranking 24 | |------------|--------:|--------:|--------: 25 | | en SGNS | 0.91092 | 0.15132 | 0.49030 26 | | en char | 0.14776 | 0.79006 | 0.50218 27 | | en electra | 1.41287 | 0.84283 | 0.49849 28 | | es SGNS | 0.92996 | 0.20406 | 0.49912 29 | | es char | 0.56952 | 0.80634 | 0.49778 30 | | fr SGNS | 1.14050 | 0.19774 | 0.49052 31 | | fr char | 0.39480 | 0.75852 | 0.49945 32 | | fr electra | 1.15348 | 0.85629 | 0.49784 33 | | it SGNS | 1.12536 | 0.20430 | 0.47692 34 | | it char | 0.36309 | 0.72732 | 0.49663 35 | | ru SGNS | 0.57683 | 0.25316 | 0.49008 36 | | ru char | 0.13498 | 0.82624 | 0.49451 37 | | ru electra | 0.87358 | 0.72086 | 0.49120 38 | 39 | 40 | ### Definition Modeling track 41 | 42 | | | Sense-BLEU | Lemma-BLEU | MoverScore 43 | |------------|-----------:|-----------:|-----------: 44 | | en SGNS | 0.00125 | 0.00250 | 0.10339 45 | | en char | 0.00011 | 0.00022 | 0.08852 46 | | en electra | 0.00165 | 0.00215 | 0.08798 47 | | es SGNS | 0.01536 | 0.02667 | 0.20130 48 | | es char | 0.01505 | 0.02471 | 0.19933 49 | | fr SGNS | 0.00351 | 0.00604 | 0.18478 50 | | fr char | 0.00280 | 0.00706 | 0.18579 51 | | fr electra | 0.00219 | 0.00301 | 0.17391 52 | | it SGNS | 0.02591 | 0.04081 | 0.20527 53 | | it char | 0.00640 | 0.00919 | 0.15902 54 | | ru SGNS | 0.01520 | 0.02112 | 0.34716 55 | | ru char | 0.01313 | 0.01847 | 0.32307 56 | | ru electra | 0.01189 | 0.01457 | 0.33577 57 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && apt-get install -y \ 5 | software-properties-common && \ 6 | rm -rf /var/lib/apt/lists/* 7 | RUN add-apt-repository ppa:deadsnakes/ppa 8 | 9 | # set up python 10 | RUN apt-get update && apt-get install -y \ 11 | apt-transport-https \ 12 | iputils-ping \ 13 | git \ 14 | curl \ 15 | build-essential \ 16 | cmake \ 17 | libhdf5-dev \ 18 | swig \ 19 | wget \ 20 | python3.8 \ 21 | python3.8-venv \ 22 | python3.8-dev \ 23 | python3-pip \ 24 | python3-software-properties 25 | 26 | RUN curl https://bootstrap.pypa.io/get-pip.py | python3.8 27 | 28 | # Without this Python thinks we're ASCII and unicode chars fail 29 | ENV LANG C.UTF-8 30 | 31 | RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 32 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 33 | 34 | # install libraries 35 | RUN pip3 install -U --no-cache-dir \ 36 | absl-py==0.12.0 \ 37 | cachetools==4.2.2 \ 38 | certifi==2020.12.5 \ 39 | chardet==4.0.0 \ 40 | click==8.0.1 \ 41 | cycler==0.10.0 \ 42 | filelock==3.0.12 \ 43 | google-auth==1.30.0 \ 44 | google-auth-oauthlib==0.4.4 \ 45 | grpcio==1.37.1 \ 46 | huggingface-hub==0.0.12 \ 47 | idna==2.10 \ 48 | joblib==1.0.1 \ 49 | kiwisolver==1.3.1 \ 50 | Markdown==3.3.4 \ 51 | matplotlib==3.4.2 \ 52 | moverscore==1.0.3 \ 53 | nltk==3.6.7 \ 54 | numpy>=1.20.3 \ 55 | oauthlib==3.1.0 \ 56 | packaging==21.0 \ 57 | Pillow==8.3.0 \ 58 | portalocker==2.3.0 \ 59 | protobuf==3.17.0 \ 60 | pyasn1==0.4.8 \ 61 | pyasn1-modules==0.2.8 \ 62 | pyemd \ 63 | pyparsing==2.4.7 \ 64 | python-dateutil==2.8.1 \ 65 | PyYAML==5.4.1 \ 66 | regex==2022.1.18 \ 67 | requests==2.25.1 \ 68 | requests-oauthlib==1.3.0 \ 69 | rsa==4.7.2 \ 70 | sacremoses==0.0.45 \ 71 | sentencepiece==0.1.96 \ 72 | six==1.16.0 \ 73 | tensorboard==2.5.0 \ 74 | tensorboard-data-server==0.6.1 \ 75 | tensorboard-plugin-wit==1.8.0 \ 76 | tokenizers==0.8.1rc2 \ 77 | torch==1.8.1 \ 78 | tqdm==4.60.0 \ 79 | transformers==3.1.0 \ 80 | typing==3.7.4.3 \ 81 | typing-extensions==3.10.0.0 \ 82 | urllib3==1.26.4 \ 83 | Werkzeug==2.0.1 84 | 85 | # the next line will patch moverscore so that it runs on cpu, rather than on your cuda:0 device. 86 | # comment this line if you have access to a GPU 87 | RUN find . -type f -name moverscore_v2.py -exec sed -i 's/cuda:0/cpu/g' {} \; 88 | RUN find . -type f -name moverscore_v2.py -exec sed -i '2 i\import os' {} \; 89 | RUN find . -type f -name moverscore_v2.py -exec sed -i "s/model_name = 'distilbert-base-uncased'/model_name = os.environ.get('MOVERSCORE_MODEL', 'distilbert-base-uncased')/g" {} \; 90 | 91 | 92 | RUN python3 -c "import nltk; nltk.download('punkt');" 93 | RUN python3 -c "import os; os.environ['MOVERSCORE_MODEL'] = 'distilbert-base-multilingual-cased' ; import moverscore_v2" 94 | -------------------------------------------------------------------------------- /rankings/README.md: -------------------------------------------------------------------------------- 1 | # What is in this directory? 2 | 3 | This subdirectory contains three subdirectories. 4 | First is `submission_scores`, which lists the submissions we received and the scores they were attributed by the scoring program. 5 | Second is `submission_ranks`, which converts submission scores into ranks (i.e., how many submissions fared better or equal to this one?). 6 | Last is `final_rankings`, which lists the maximum rank per user, the average of these maximum ranks, and a nominal ranking per users, as listed below. 7 | 8 | We also include the python scripts we used to convert raw submission scores into official rankings. 9 | 10 | # Official rankings 11 | 12 | Below are the official rankings for the SemEval 2022 CODWOE Shared task. 13 | 14 | ### Definition Modeling track 15 | 16 | Below are the results for the Definition Modeling track. 17 | 18 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 19 | |---------------|--------:|--------:|--------:|--------:|--------: 20 | | Locchi | 8 | 6 | | 7 | 21 | | WENGSYX | 9 | 7 | 6 | 6 | 6 22 | | cunliang.kong | 3 | 2 | 3 | **1** | 2 23 | | IRB-NLP | 2 | **1** | **1** | 5 | 5 24 | | emukans | 5 | 4 | 4 | 4 | 3 25 | | guntis | 6 | | | | 26 | | lukechan1231 | 7 | 5 | 5 | 3 | 4 27 | | pzchen | 4 | 3 | 2 | 2 | **1** 28 | | talent404 | **1** | | | | 29 | 30 | ### Reverse Dictionary track 31 | 32 | Below are the results for the Reverse dictionary tracks. 33 | There are separate rankings, based on which targets participants have submitted. 34 | 35 | #### A. SGNS targets 36 | 37 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 38 | |------------------|--------:|--------:|--------:|--------:|--------: 39 | | Locchi | 4 | | | 4 | 40 | | Nihed_Bendahman_ | 5 | 5 | 4 | 6 | 4 41 | | WENGSYX | **1** | 2 | 2 | 3 | **1** 42 | | MMG | | 3 | | | 43 | | chlrbgus321 | N/A | | | | 44 | | IRB-NLP | 3 | **1** | **1** | **1** | 2 45 | | pzchen | 2 | 4 | 3 | 2 | 3 46 | | the0ne | 7 | | | | 47 | | tthhanh | 8 | 7 | 6 | 7 | 6 48 | | zhwa3087 | 6 | 6 | 5 | 5 | 5 49 | 50 | #### B. ELECTRA targets 51 | 52 | | user / team | Rank EN | Rank FR | Rank RU 53 | |------------------|--------:|--------:|--------: 54 | | Locchi | 3 | | 55 | | Nihed_Bendahman_ | 2 | 2 | 4 56 | | WENGSYX | 4 | 4 | 2 57 | | IRB-NLP | 5 | 3 | 3 58 | | pzchen | **1** | **1** | **1** 59 | | the0ne | 6 | | 60 | 61 | 62 | #### C. Char-based targets 63 | 64 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 65 | |------------------|--------:|--------:|--------:|--------:|--------: 66 | | Locchi | **1** | | | 4 | 67 | | Nihed_Bendahman_ | 2 | 2 | 2 | 3 | 4 68 | | WENGSYX | 7 | 5 | 5 | 6 | 5 69 | | IRB-NLP | 4 | 3 | 4 | 2 | 2 70 | | pzchen | 3 | **1** | **1** | **1** | **1** 71 | | the0ne | 5 | | | | 72 | | zhwa3087 | 6 | 4 | 3 | 5 | 3 73 | 74 | 75 | # How were rankings computed? 76 | 77 | See the python scripts. 78 | 79 | We start by converting scalar scores into ranked scores: i.e., instead of considering the absolute value obtained by a submission, we count how many submissions fared better than the current one. 80 | This is done so as to neutralize the fact that some metrics have to be maximized (e.g., cosine), whereas others ought to be minimized (e.g., MSE), and that all metrics have different maxima and minima (e.g., MSE is defined from 0 to infinity, but cosine is defined from -1 to +1). 81 | 82 | We then take the maximum rank per user, and average across the three metrics for each target, namely: 83 | - MSE, cosine and rank-cosine for each target architecture in the Reverse Dictionary track 84 | - S-BLEU, L-BLEU and MoverScore for the Definition Modeling 85 | 86 | Finally, we manually converted the average per targets into nominal rankings. 87 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Dataset access. 2 | 3 | **The datasets are available at the following page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).** 4 | 5 | # Dataset structure 6 | 7 | This document details the structure of the JSON dataset file we provide. More information is available on the competition website: [link](https://competitions.codalab.org/competitions/34022#participate-get_data). 8 | 9 | 10 | ## Brief Overview 11 | 12 | As an overview, the expected usage of the datasets is as follow: 13 | + In the Definition Modeling track, we expect participants to use the embeddings ("char", "sgns", "electra") to generate the associated definition ("gloss"). 14 | + In the Reverse Dictionary track, we expect participants to use the definition ("gloss") to generate any of the associated embeddings ("char", "sgns", "electra"). 15 | 16 | 17 | ## Dataset files structure 18 | 19 | Each dataset file correspond to a data split (trial/train/dev/test) for one of the languages. 20 | 21 | Dataset files are in the JSON format. A dataset file contains a list of examples. Each example is a JSON dictionary, containing the following keys: 22 | + "id", 23 | + "gloss" 24 | + "sgns" 25 | + "char" 26 | 27 | The English, French and Russian dictionary also contain an "electra" key. 28 | 29 | As a concrete instance, here is an example from the English training dataset: 30 | ```json 31 | { 32 | "id": "en.train.2", 33 | "gloss": "A vocal genre in Hindustani classical music", 34 | "sgns": [ 35 | -0.0602365807, 36 | ... 37 | ], 38 | "char": [ 39 | -0.3631578386, 40 | ... 41 | ], 42 | "electra": [ 43 | -1.3904430866, 44 | ... 45 | ] 46 | }, 47 | ``` 48 | 49 | ## Description of contents 50 | 51 | The value associated to "id" tracks the language, data split and unique identifier for this example. 52 | 53 | The value associated to the "gloss" key is a definition, as you would find in a classical dictionary. It is to be used either the target in the Definition Modeling track, or asthe source in the Reverse Dictionary track. 54 | 55 | All other keys ("char", "sgns", "electra") correspond to embeddings, and the associated values are arrays of floats representing the components. They all can serve as targets for the Reverse Dictionary track. 56 | + "char" corresponds to character-based embeddings, computed using an auto-encoder on the spelling of a word. 57 | + "sgns" corresponds to skip-gram with negative sampling embeddings (aka. word2vec) 58 | + "electra" corresponds to Transformer-based contextualized embeddings. 59 | 60 | 61 | ## Using the dataset files 62 | 63 | Given that the data is in JSON format, it is straightforward to load it in python: 64 | 65 | ```python 66 | import json 67 | with open(PATH_TO_DATASET, "r") as file_handler: 68 | dataset = json.load(file_handler) 69 | ``` 70 | 71 | A more complete example for pytorch is available in the git repository (see here: [link](https://git.atilf.fr/tmickus/codwoe/-/blob/master/code/data.py#L18)). 72 | 73 | ## Expected output format 74 | 75 | During the evaluation phase, we will expect submissions to reconstruct the same JSON format. 76 | 77 | The test JSON files for input will be separate for each track. They will contain the "id" key, and either the "gloss" key (in the reverse dictionary track) or the embedding keys ("char" and "sgns" keys, and "electra" "key" in EN/FR/RU, in the definition modeling track). 78 | 79 | In the definition modeling track, participants should construct JSON files that contain at least the two following keys: 80 | + the original "id" 81 | + their generated "gloss" 82 | 83 | In the reverse dictionary, participants should construct JSON files that contain at least the two following keys: 84 | + the original "id", 85 | + any of the valid embeddings ("char", "sgns", or "electra" key in EN/FR/RU) 86 | 87 | Other keys can be added. More details concerning the evaluation procedure are available here: [link](https://competitions.codalab.org/competitions/34022#learn_the_details-evaluation). 88 | 89 | ## License Information 90 | 91 | The complete datasets, embedding architectures and embedding models will be made publicly available after the evaluation phase under a CC-BY-SA license. Please link to the competition website page ([link](https://competitions.codalab.org/competitions/34022)) and cite our upcoming task description paper if you use these datasets in your own work. 92 | 93 | Dictionary data has been extracted from dumps provided by [Sérasset (2014)](http://kaiko.getalp.org/about-dbnary/). Embeddings were trained specifically for this shared task; all details will be made available in the task description paper. 94 | 95 | -------------------------------------------------------------------------------- /codalab/competition/terms_and_conditions.html: -------------------------------------------------------------------------------- 1 |

Terms and Conditions

2 |

3 | Participants should generally adopt a spirit of good sportsmanship and 4 | avoid any unfair or otherwise unconscionable conduct. We provide the following 5 | terms and conditions to clearly delineate the guidelines to which the 6 | participants are expected to adhere. Organizers reserve the right to amend in 7 | any way the following terms, in which case modifications will be advertised 8 | through the shared task mailing list and the CodaLab forums. 9 |
10 | Participants may contact the organizers if any of the following terms 11 | raises their concern. 12 |

13 |

14 | Participation to the competition: Any interested person may freely 15 | participate to the competition. By participating to the competition, you agree 16 | to the terms and conditions in their entirety, without amendment or provision. 17 | By participating to the competition, you understand and agree that your scores 18 | and submissions will be made public. 19 |
20 | Scores and submissions are understood as any direct or indirect contributions 21 | to this site or the shared task organizers, such as, but not limited to: 22 | results of automatic scoring programs; manual, qualitative and quantitative 23 | assessments of the data submitted; etc. 24 |
25 | Participants may create teams. Participants may not be part of more than one 26 | team. Teams and participants not belonging to any team must create exactly one 27 | account to the codalab competition. Team composition may not be changed once 28 | the evaluation phase starts. 29 |

30 |

31 | Scoring of submissions: Organizers are under no obligation to release 32 | scores. Official scores may be withheld, amended or removed if organizers 33 | judge the submission incomplete, erroneous, deceptive, or violating the letter 34 | or spirit of the competition's rules. Inclusion of a submission's scores is 35 | not an endorsement of a team or individual's submission, system, or science. 36 |
37 | Up to 50 submissions will be allowed during the evaluation phase. Scores will 38 | not be visible on the leaderboards until the evaluation phase is over. 39 |
40 | Submission files will be grouped according to the track, language, and in the 41 | case of the reverse dictionary track, the embedding architecture targeted; the 42 | last submission file per group will be understood as the team's or 43 | participant's definitive submission and ranked as such in the task description 44 | paper. 45 |

46 |

47 | Data usage: The provided data should be used responsibly and ethically. 48 | Do not attempt to misuse it in any way, including, but not limited to, 49 | reconstructing test sets, any none-scientific use of the data, or any other 50 | unconscionable usage of the data. 51 |
52 | During the course of the shared task, participants are not allowed to use 53 | any external data. This is to ensure that results are immediately 54 | comparable. Participants will be allowed to use external data once the 55 | evaluation phase is over for system review. All data will be released at the 56 | end of the evaluation phase. 57 |

58 |

59 | Submission of system description papers: Participants having made at 60 | least one submission during the evaluation phase will be invited to submit a 61 | paper describing their system. As a requirement, a link to the code of 62 | systems being described will be made available to organizers or the public at 63 | large. Participants submitting a system description paper will also be 64 | asked to review papers submitted by their peers in a single-blind process. 65 |
66 | We further encourage system description papers to include a manual analysis 67 | of their systems results and productions. The presence and quality of such 68 | an analysis will be assessed during the review process. The task description 69 | paper will also devote a significant amount of space to highlighting 70 | outstanding manual evaluations conducted by participants. 71 |

72 | Collection of system productions: Participants having made at least one 73 | submission during the evaluation phase will be invited to submit their 74 | systems' outputs to a dataset of system productions. The purpose of this 75 | collection of system productions will solely be to propose them as a shared 76 | task for upcoming text generation evaluation campaigns. 77 |

78 |

79 | Funding Acknowledgments: This shared task was supported by a public 80 | grant overseen by the French National Research Agency (ANR) as part of the 81 | "Investissements d'Avenir" program: Idex Lorraine Université 82 | d'Excellence (reference: ANR-15-IDEX-0004). 83 |
84 | Future sponsors, if any, will be appended to this section. 85 |

86 | -------------------------------------------------------------------------------- /code/check_output.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import json 4 | import logging 5 | import pathlib 6 | import sys 7 | 8 | logger = logging.getLogger(pathlib.Path(__file__).name) 9 | logger.setLevel(logging.DEBUG) 10 | handler = logging.StreamHandler(sys.stdout) 11 | handler.setFormatter( 12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 13 | ) 14 | logger.addHandler(handler) 15 | 16 | 17 | def get_parser( 18 | parser=argparse.ArgumentParser( 19 | description="Verify the output format of a submission" 20 | ), 21 | ): 22 | parser.add_argument("submission_file", type=pathlib.Path, help="file to check") 23 | return parser 24 | 25 | 26 | def main(filename): 27 | try: 28 | with open(filename, "r") as istr: 29 | items = json.load(istr) 30 | except: 31 | raise ValueError(f'File "{filename}": could not open, submission will fail.') 32 | else: 33 | # expected_keys = {"id", "gloss"} 34 | for item in items: 35 | # keys_not_found = expected_keys - set(item.keys()) 36 | if "id" not in item: 37 | raise ValueError( 38 | f'File "{filename}": one or more items do not contain an id, submission will fail.' 39 | ) 40 | ids = sorted([item["id"] for item in items]) 41 | ids = [i.split(".") for i in ids] 42 | langs = {i[0] for i in ids} 43 | if len(langs) != 1: 44 | raise ValueError( 45 | f'File "{filename}": ids do not identify a unique language, submission will fail.' 46 | ) 47 | tracks = {i[-2] for i in ids} 48 | if len(tracks) != 1: 49 | raise ValueError( 50 | f'File "{filename}": ids do not identify a unique track, submission will fail.' 51 | ) 52 | track = next(iter(tracks)) 53 | if track not in ("revdict", "defmod"): 54 | raise ValueError( 55 | f'File "{filename}": unknown track identified {track}, submission will fail.' 56 | ) 57 | lang = next(iter(langs)) 58 | if lang not in ("en", "es", "fr", "it", "ru"): 59 | raise ValueError( 60 | f'File "{filename}": unknown language {lang}, submission will fail.' 61 | ) 62 | serials = list(sorted({int(i[-1]) for i in ids})) 63 | if serials != list(range(1, len(ids) + 1)): 64 | raise ValueError( 65 | f'File "{filename}": ids do not identify all items in dataset, submission will fail.' 66 | ) 67 | if track == "revdict": 68 | vec_archs = set(items[0].keys()) - { 69 | "id", 70 | "gloss", 71 | "word", 72 | "pos", 73 | "concrete", 74 | "example", 75 | "f_rnk", 76 | "counts", 77 | "polysemous", 78 | } 79 | if len(vec_archs) == 0: 80 | raise ValueError( 81 | f'File "{filename}": no vector architecture was found, revdict submission will fail.' 82 | ) 83 | for item in items: 84 | if not all(v in item for v in vec_archs): 85 | raise ValueError( 86 | f'File "{filename}": some items do not contain all the expected vectors, revdict submission will fail.' 87 | ) 88 | if len(vec_archs - {"sgns", "char", "electra"}): 89 | raise ValueError( 90 | f'File "{filename}": unknown vector architecture(s), revdict submission will fail.' 91 | ) 92 | if track == "defmod" and any("gloss" not in i for i in items): 93 | raise ValueError( 94 | f'File "{filename}": some items do not contain a gloss, defmod submission will fail.' 95 | ) 96 | 97 | ok_message = ( 98 | f'File "{filename}": no problems were identified.\n' 99 | + f"The submission will be understood as follows:\n" 100 | + f"\tSubmission on track {track} for language {lang}, {len(ids)} predictions.\n" 101 | ) 102 | if track == "revdict": 103 | vec_archs = tuple(sorted(vec_archs)) 104 | ok_message += ( 105 | f'\tSubmission predicts these embeddings: {", ".join(vec_archs)}.' 106 | ) 107 | else: 108 | vec_archs = None 109 | logger.debug(ok_message) 110 | CheckSummary = collections.namedtuple( 111 | "CheckSummary", ["filename", "track", "lang", "vec_archs"] 112 | ) 113 | return CheckSummary(filename, track, lang, vec_archs) 114 | 115 | 116 | if __name__ == "__main__": 117 | main(get_parser().parse_args().submission_file) 118 | -------------------------------------------------------------------------------- /baseline_archs/code/check_output.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import json 4 | import logging 5 | import pathlib 6 | import sys 7 | 8 | logger = logging.getLogger(pathlib.Path(__file__).name) 9 | logger.setLevel(logging.DEBUG) 10 | handler = logging.StreamHandler(sys.stdout) 11 | handler.setFormatter( 12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 13 | ) 14 | logger.addHandler(handler) 15 | 16 | 17 | def get_parser( 18 | parser=argparse.ArgumentParser( 19 | description="Verify the output format of a submission" 20 | ), 21 | ): 22 | parser.add_argument("submission_file", type=pathlib.Path, help="file to check") 23 | return parser 24 | 25 | 26 | def main(filename): 27 | try: 28 | with open(filename, "r") as istr: 29 | items = json.load(istr) 30 | except: 31 | raise ValueError(f'File "{filename}": could not open, submission will fail.') 32 | else: 33 | # expected_keys = {"id", "gloss"} 34 | for item in items: 35 | # keys_not_found = expected_keys - set(item.keys()) 36 | if "id" not in item: 37 | raise ValueError( 38 | f'File "{filename}": one or more items do not contain an id, submission will fail.' 39 | ) 40 | ids = sorted([item["id"] for item in items]) 41 | ids = [i.split(".") for i in ids] 42 | langs = {i[0] for i in ids} 43 | if len(langs) != 1: 44 | raise ValueError( 45 | f'File "{filename}": ids do not identify a unique language, submission will fail.' 46 | ) 47 | tracks = {i[-2] for i in ids} 48 | if len(tracks) != 1: 49 | raise ValueError( 50 | f'File "{filename}": ids do not identify a unique track, submission will fail.' 51 | ) 52 | track = next(iter(tracks)) 53 | if track not in ("revdict", "defmod"): 54 | raise ValueError( 55 | f'File "{filename}": unknown track identified {track}, submission will fail.' 56 | ) 57 | lang = next(iter(langs)) 58 | if lang not in ("en", "es", "fr", "it", "ru"): 59 | raise ValueError( 60 | f'File "{filename}": unknown language {lang}, submission will fail.' 61 | ) 62 | serials = list(sorted({int(i[-1]) for i in ids})) 63 | if serials != list(range(1, len(ids) + 1)): 64 | raise ValueError( 65 | f'File "{filename}": ids do not identify all items in dataset, submission will fail.' 66 | ) 67 | if track == "revdict": 68 | vec_archs = set(items[0].keys()) - { 69 | "id", 70 | "gloss", 71 | "word", 72 | "pos", 73 | "concrete", 74 | "example", 75 | "f_rnk", 76 | "counts", 77 | "polysemous", 78 | } 79 | if len(vec_archs) == 0: 80 | raise ValueError( 81 | f'File "{filename}": no vector architecture was found, revdict submission will fail.' 82 | ) 83 | for item in items: 84 | if not all(v in item for v in vec_archs): 85 | raise ValueError( 86 | f'File "{filename}": some items do not contain all the expected vectors, revdict submission will fail.' 87 | ) 88 | if len(vec_archs - {"sgns", "char", "electra"}): 89 | raise ValueError( 90 | f'File "{filename}": unknown vector architecture(s), revdict submission will fail.' 91 | ) 92 | if track == "defmod" and any("gloss" not in i for i in items): 93 | raise ValueError( 94 | f'File "{filename}": some items do not contain a gloss, defmod submission will fail.' 95 | ) 96 | 97 | ok_message = ( 98 | f'File "{filename}": no problems were identified.\n' 99 | + f"The submission will be understood as follows:\n" 100 | + f"\tSubmission on track {track} for language {lang}, {len(ids)} predictions.\n" 101 | ) 102 | if track == "revdict": 103 | vec_archs = tuple(sorted(vec_archs)) 104 | ok_message += ( 105 | f'\tSubmission predicts these embeddings: {", ".join(vec_archs)}.' 106 | ) 107 | else: 108 | vec_archs = None 109 | logger.debug(ok_message) 110 | CheckSummary = collections.namedtuple( 111 | "CheckSummary", ["filename", "track", "lang", "vec_archs"] 112 | ) 113 | return CheckSummary(filename, track, lang, vec_archs) 114 | 115 | 116 | if __name__ == "__main__": 117 | main(get_parser().parse_args().submission_file) 118 | -------------------------------------------------------------------------------- /code/models.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import data 8 | 9 | 10 | class PositionalEncoding(nn.Module): 11 | """From PyTorch""" 12 | 13 | def __init__(self, d_model, dropout=0.1, max_len=4096): 14 | super(PositionalEncoding, self).__init__() 15 | self.dropout = nn.Dropout(p=dropout) 16 | pe = torch.zeros(max_len, d_model) 17 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 18 | div_term = torch.exp( 19 | torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model) 20 | ) 21 | pe[:, 0::2] = torch.sin(position * div_term) 22 | pe[:, 1::2] = torch.cos(position * div_term) 23 | pe = pe.unsqueeze(0).transpose(0, 1) 24 | self.register_buffer("pe", pe) 25 | 26 | def forward(self, x): 27 | x = x + self.pe[: x.size(0)] 28 | return self.dropout(x) 29 | 30 | 31 | class DefmodModel(nn.Module): 32 | """A transformer architecture for Definition Modeling.""" 33 | 34 | def __init__( 35 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=256 36 | ): 37 | super(DefmodModel, self).__init__() 38 | self.d_model = d_model 39 | self.padding_idx = vocab[data.PAD] 40 | self.eos_idx = vocab[data.EOS] 41 | self.maxlen = maxlen 42 | 43 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx) 44 | self.positional_encoding = PositionalEncoding( 45 | d_model, dropout=dropout, max_len=maxlen 46 | ) 47 | encoder_layer = nn.TransformerEncoderLayer( 48 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2 49 | ) 50 | self.transformer_encoder = nn.TransformerEncoder( 51 | encoder_layer, num_layers=n_layers 52 | ) 53 | self.v_proj = nn.Linear(d_model, len(vocab)) 54 | # initializing weights 55 | for name, param in self.named_parameters(): 56 | if param.dim() > 1: 57 | nn.init.xavier_uniform_(param) 58 | elif "bias" in name: 59 | nn.init.zeros_(param) 60 | else: # gain parameters of the layer norm 61 | nn.init.ones_(param) 62 | 63 | def generate_square_subsequent_mask(self, sz): 64 | "from Pytorch" 65 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 66 | mask = ( 67 | mask.float() 68 | .masked_fill(mask == 0, float("-inf")) 69 | .masked_fill(mask == 1, float(0.0)) 70 | ) 71 | return mask 72 | 73 | def forward(self, vector, input_sequence=None): 74 | device = next(self.parameters()).device 75 | embs = self.embedding(input_sequence) 76 | seq = torch.cat([vector.unsqueeze(0), embs], dim=0) 77 | src = self.positional_encoding(seq) 78 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device) 79 | src_key_padding_mask = torch.cat( 80 | [ 81 | torch.tensor([[False] * input_sequence.size(1)]).to(device), 82 | (input_sequence == self.padding_idx), 83 | ], 84 | dim=0, 85 | ).t() 86 | transformer_output = self.transformer_encoder( 87 | src, mask=src_mask, src_key_padding_mask=src_key_padding_mask 88 | ) 89 | v_dist = self.v_proj(transformer_output) 90 | return v_dist 91 | 92 | @staticmethod 93 | def load(file): 94 | return torch.load(file) 95 | 96 | def save(self, file): 97 | torch.save(self, file) 98 | 99 | def pred(self, vector): 100 | generated_symbols = [] 101 | device = next(self.parameters()).device 102 | batch_size = vector.size(0) 103 | src = vector.unsqueeze(0) 104 | has_stopped = torch.tensor([False] * batch_size).to(device) 105 | src_key_padding_mask = torch.tensor([[False] * batch_size]).to(device) 106 | for step_idx in range(self.maxlen): 107 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device) 108 | src_pe = self.positional_encoding(src) 109 | transformer_output = self.transformer_encoder( 110 | src_pe, mask=src_mask, src_key_padding_mask=src_key_padding_mask.t() 111 | )[-1] 112 | v_dist = self.v_proj(transformer_output) 113 | new_symbol = v_dist.argmax(-1) 114 | new_symbol = new_symbol.masked_fill(has_stopped, self.padding_idx) 115 | generated_symbols.append(new_symbol) 116 | src_key_padding_mask = torch.cat( 117 | [src_key_padding_mask, has_stopped.unsqueeze(0)], dim=0 118 | ) 119 | has_stopped = has_stopped | (new_symbol == self.eos_idx) 120 | src = torch.cat([src, self.embedding(new_symbol).unsqueeze(0)], dim=0) 121 | if has_stopped.all(): 122 | break 123 | output_sequence = torch.stack(generated_symbols, dim=0) 124 | return output_sequence 125 | 126 | 127 | class RevdictModel(nn.Module): 128 | """A transformer architecture for Definition Modeling.""" 129 | 130 | def __init__( 131 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=512 132 | ): 133 | super(RevdictModel, self).__init__() 134 | self.d_model = d_model 135 | self.padding_idx = vocab[data.PAD] 136 | self.eos_idx = vocab[data.EOS] 137 | self.maxlen = maxlen 138 | 139 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx) 140 | self.positional_encoding = PositionalEncoding( 141 | d_model, dropout=dropout, max_len=maxlen 142 | ) 143 | encoder_layer = nn.TransformerEncoderLayer( 144 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2 145 | ) 146 | self.transformer_encoder = nn.TransformerEncoder( 147 | encoder_layer, num_layers=n_layers 148 | ) 149 | self.dropout = nn.Dropout(p=dropout) 150 | self.e_proj = nn.Linear(d_model, d_model) 151 | for name, param in self.named_parameters(): 152 | if param.dim() > 1: 153 | nn.init.xavier_uniform_(param) 154 | elif "bias" in name: 155 | nn.init.zeros_(param) 156 | else: # gain parameters of the layer norm 157 | nn.init.ones_(param) 158 | 159 | def forward(self, gloss_tensor): 160 | src_key_padding_mask = gloss_tensor == self.padding_idx 161 | embs = self.embedding(gloss_tensor) 162 | src = self.positional_encoding(embs) 163 | transformer_output = self.dropout( 164 | self.transformer_encoder(src, src_key_padding_mask=src_key_padding_mask.t()) 165 | ) 166 | summed_embs = transformer_output.masked_fill( 167 | src_key_padding_mask.unsqueeze(-1), 0 168 | ).sum(dim=0) 169 | return self.e_proj(F.relu(summed_embs)) 170 | 171 | @staticmethod 172 | def load(file): 173 | return torch.load(file) 174 | 175 | def save(self, file): 176 | torch.save(self, file) 177 | -------------------------------------------------------------------------------- /codalab/competition/overview.html: -------------------------------------------------------------------------------- 1 |

2 | CODWOE: COmparing Dictionaries and WOrd Embeddings 3 |

4 |

5 | The CODWOE shared task invites you to compare two types of semantic 6 | descriptions: dictionary glosses and word embedding representations. Are these 7 | two types of representation equivalent? Can we generate one from the other? To 8 | study this question, we propose two subtracks: a definition modeling 9 | track (Noraset et 10 | al., 2017), where participants have to generate glosses from 11 | vectors, and a reverse dictionary track 12 | (Hill et al., 2016), where 13 | participants have to generate vectors from glosses. 14 |

15 |

16 | These two tracks display a number of interesting characteristics. Definition 17 | modeling is a vector-to-sequence task, the reverse dictionary task is a 18 | sequence-to-vector task—and you know that kind of thing gets NLP people 19 | swearing out loud. These tasks are also useful for explainable AI, since they 20 | involve converting human-readable data into machine-readable data and back. 21 |

22 |

23 | Dictionaries contain definitions, such as 24 | Merriam 25 | Webster's: 26 |

27 |

28 | cod: any of various bottom-dwelling fishes (family Gadidae, the cod 29 | family) that usually occur in cold marine waters and often have barbels and 30 | three dorsal fins 31 |
32 |

33 | The task of definition modeling consists in using the vector representation of 34 | co⃗d to produce the associated gloss, "any of various 35 | bottom-dwelling fishes (family Gadidae, the cod family) that usually occur in 36 | cold marine waters and often have barbels and three dorsal fins". The 37 | reverse dictionary task is the mathematical inverse: reconstruct an embedding 38 | co⃗d from the corresponding gloss. 39 |

40 |

41 | These two tracks display a number of interesting characteristics. These tasks 42 | are obviously useful for explainable AI, since they involve converting 43 | human-readable data into machine-readable data and back. They also have a 44 | theoretical significance: both glosses and word embeddings are also 45 | representations of meaning, and therefore involve the conversion of distinct 46 | non-formal semantic representations. From a practical point of view, the 47 | ability to infer word-embeddings from dictionary resources, or dictionaries 48 | from large unannotated corpora, would prove a boon for many under-resourced 49 | languages. 50 |

51 |

52 | Dive right in and get started! 53 |

54 |

55 | The data can be retrieved from 56 | our git 57 | repository. See the related codalab 58 | page for more details as well. 59 |

60 |

61 | To help participants get started, we provide a basic architecture for both 62 | tracks, a submission format checker, and the scoring script. All of this is 63 | available in our public git 64 | repository. 65 |

66 |

67 | Keep in mind the we do not allow external data! The point is to keep 68 | results linguistically significant and easily comparable. For all details on 69 | how we will evaluate submissions, check the relevant 70 | codalab page. 71 |

72 | 73 |

74 | What we are fishing for with this shared task 75 |

76 |

77 | Rather than focusing strictly on getting the highest scores on a benchmark, we 78 | encourage participants to approach this shared task as a collaborative 79 | research question: how should we compare two vastly different types of 80 | semantic representations such as dictionaries and word embeddings? What 81 | caveats are there? In fact, we already have a few questions we look forward to 82 | study at the end of this shared task: 83 |

84 | 102 |

103 | These are but a few questions that we are interested in—do come up with 104 | your own to test during this shared task! To encourage participants to adopt 105 | this mindset, here are a few key elements of this shared task: 106 |

107 | 126 |

127 | As is usual for SemEval tasks, we will release all data at the end of the 128 | shared task. Depending on participants’ consent, we also plan to collect the 129 | productions of all models and reuse them in a future evaluation campaign. 130 |

131 | 132 |

133 | Shared task timeline (this too shall bass) 134 |

135 |

136 | Here are the key dates participants should keep in mind. Do note that these 137 | are subject to change. 138 |

155 | Camera-ready due date and SemEval 2022 workshops will be announced at a later 156 | date. 157 |

158 | 159 |

160 | You have an issue? You need kelp? Get in touch! 161 |

162 |

163 | There’s a google group for all prospective participants: check it out at 164 | 165 | semeval2022-dictionaries-and-word-embeddings@googlegroups.com. You can 166 | also reach us organizers directly at 167 | tmickus@atilf.fr; make sure to mention the SemEval task in the email 168 | subject. 169 |

170 | -------------------------------------------------------------------------------- /codalab/competition/evaluation.html: -------------------------------------------------------------------------------- 1 |

Evaluation Criteria

2 | 3 |

4 | The evaluation script is available on our 5 | 6 | git repository for reference. Note that the complete dataset is 7 | required to run all the metrics. Metrics requiring the full dataset are 8 | indicated as such in the list below. The complete dataset will be made 9 | available at the end of the competition. 10 |

11 |

12 | Participants may not use any external resource. This requirement is to 13 | ensure that all submissions are easily comparable. We will ask participants 14 | planning to submit a system description paper to forward a link to their code. 15 |

16 |

17 | Participants will also be invited to contribute their systems' outputs to a 18 | dataset of system productions. The purpose of this collection of system 19 | productions is to propose them as a shared task for upcoming text generation 20 | evaluation campaigns. 21 |

22 | 23 |

Metrics for the definition modeling track

24 |

25 | Definition modeling submissions are evaluated using three metrics: 26 |

27 | 54 |

55 | Scoring a definition modeling submission using MoverScore on CPU takes some 56 | time (15min or more). Results may not be available immediately upon 57 | submission. 58 |

59 |

60 | Scores for distinct languages have different entries in the leaderboards, and 61 | will correspond to distinct official rankings in the task paper. 62 |

63 |

64 | Submissions to the definition modeling track must consist of a ZIP archive 65 | containing one or more JSON files. These JSON files must contain a list of 66 | JSON objects, each of which must at least contain two keys: "id" and "gloss". 67 | The id key is used to match submissions with references. The gloss key should 68 | map to the string production to be evaluated. See our 69 | 70 | git repository for an example architecture that can output the correct 71 | JSON format. 72 |

73 |

74 | To have your outputs scored, create a ZIP archive containing all the files you 75 | wish to submit, and upload it on CodaLab during the Evaluation phase. You can 76 | submit files for both tracks (definition modeling and reverse dictionary) at 77 | once in a single ZIP archive. Make sure that setups are unique: do not include 78 | two JSON files containing predictions for the same pair of track and language. 79 |

80 |

81 | Do not attempt to submit glosses for different languages with a single JSON 82 | submission file. This will fail. Instead, make distinct submission files per 83 | language. 84 |

85 |

86 | We strongly encourage you to check the format of your submission using our 87 | 88 | format checker before submitting to CodaLab. This script will also 89 | summarize how your submission will be understood by the scoring program. 90 |

91 | 92 |

Metrics for the reverse dictionary track

93 |

94 | Reverse dictionary submissions are evaluated using three metrics: 95 |

96 | 112 |

113 | Scores for distinct embeddings and languages have different entries in the 114 | leaderboards, and will corresponding to distinct official rankings in the task 115 | paper. 116 |

117 |

118 | Submissions to the reverse dictionary track must consist of a ZIP archive 119 | containing one or more JSON files. These JSON files must contain a list of 120 | JSON objects, each of which must at least contain two keys: "id" and one among 121 | "sgns", "char" or "electra", identifying which architecture your submission 122 | tries to reconstruct. The "id" key is used to match submissions with 123 | references. The other key(s) should map to the vector reconstruction to be 124 | evaluated, as a list of float components. See our 125 | 126 | git repository for an example architecture that can output the correct 127 | JSON format. 128 |

129 |

130 | To have your outputs scored, create a ZIP archive containing all the files you 131 | wish to submit, and upload it on CodaLab during the Evaluation phase. You can 132 | submit files for both tracks (reverse dictionary and definition modeling) at 133 | once in a single ZIP archive. Make sure that setups are unique: do not include 134 | two JSON files containing predictions for the same configuration of track, 135 | language and embedding architecture. 136 |

137 |

138 | Do not attempt to submit embeddings for different languages in a single JSON 139 | submission. This will fail. Instead, make distinct submission files per 140 | language. You may however group reconstructions for multiple architectures in 141 | a single submission file. 142 |

143 |

144 | We strongly encourage you to check the format of your submission using our 145 | 146 | format checker before submitting to CodaLab. This script will also 147 | summarize how your submission will be understood by the scoring program. 148 |

149 | 150 |

Manual evaluations

151 |

152 | We very strongly encourage participants to make use of the trial dataset for 153 | running manual evaluations of their systems' production. The presence of a 154 | manual evaluation in system descriptions will be taken into account during the 155 | reviewing process. 156 |

157 | -------------------------------------------------------------------------------- /code/data.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import count 3 | import json 4 | import random 5 | 6 | import torch 7 | from torch.nn.utils.rnn import pad_sequence 8 | from torch.utils.data import DataLoader, Dataset, Sampler 9 | 10 | BOS = "" 11 | EOS = "" 12 | PAD = "" 13 | UNK = "" 14 | 15 | SUPPORTED_ARCHS = ("sgns", "char") 16 | 17 | # A dataset is a container object for the actual data 18 | class JSONDataset(Dataset): 19 | """Reads a CODWOE JSON dataset""" 20 | 21 | def __init__(self, file, vocab=None, freeze_vocab=False, maxlen=256): 22 | """ 23 | Construct a torch.utils.data.Dataset compatible with torch data API and 24 | codwoe data. 25 | args: `file` the path to the dataset file 26 | `vocab` a dictionary mapping strings to indices 27 | `freeze_vocab` whether to update vocabulary, or just replace unknown items with OOV token 28 | `maxlen` the maximum number of tokens per gloss 29 | """ 30 | if vocab is None: 31 | self.vocab = defaultdict(count().__next__) 32 | else: 33 | self.vocab = defaultdict(count(len(vocab)).__next__) 34 | self.vocab.update(vocab) 35 | pad, eos, bos, unk = ( 36 | self.vocab[PAD], 37 | self.vocab[EOS], 38 | self.vocab[BOS], 39 | self.vocab[UNK], 40 | ) 41 | if freeze_vocab: 42 | self.vocab = dict(vocab) 43 | with open(file, "r") as istr: 44 | self.items = json.load(istr) 45 | # preparse data 46 | for json_dict in self.items: 47 | # in definition modeling test datasets, gloss targets are absent 48 | if "gloss" in json_dict: 49 | json_dict["gloss_tensor"] = torch.tensor( 50 | [bos] 51 | + [ 52 | self.vocab[word] 53 | if not freeze_vocab 54 | else self.vocab.get(word, unk) 55 | for word in json_dict["gloss"].split() 56 | ] 57 | + [eos] 58 | ) 59 | if maxlen: 60 | json_dict["gloss_tensor"] = json_dict["gloss_tensor"][:maxlen] 61 | # in reverse dictionary test datasets, vector targets are absent 62 | for arch in SUPPORTED_ARCHS: 63 | if arch in json_dict: 64 | json_dict[f"{arch}_tensor"] = torch.tensor(json_dict[arch]) 65 | if "electra" in json_dict: 66 | json_dict["electra_tensor"] = torch.tensor(json_dict["electra"]) 67 | self.has_gloss = "gloss" in self.items[0] 68 | self.has_vecs = SUPPORTED_ARCHS[0] in self.items[0] 69 | self.has_electra = "electra" in self.items[0] 70 | self.itos = sorted(self.vocab, key=lambda w: self.vocab[w]) 71 | 72 | def __len__(self): 73 | return len(self.items) 74 | 75 | def __getitem__(self, index): 76 | return self.items[index] 77 | 78 | # we're adding this method to simplify the code in our predictions of 79 | # glosses 80 | def decode(self, tensor): 81 | """Convert a sequence of indices (possibly batched) to tokens""" 82 | with torch.no_grad(): 83 | if tensor.dim() == 2: 84 | # we have batched tensors of shape [Seq x Batch] 85 | decoded = [] 86 | for tensor_ in tensor.t(): 87 | decoded.append(self.decode(tensor_)) 88 | return decoded 89 | else: 90 | return " ".join( 91 | [self.itos[i.item()] for i in tensor if i != self.vocab[PAD]] 92 | ) 93 | 94 | def save(self, file): 95 | torch.save(self, file) 96 | 97 | @staticmethod 98 | def load(file): 99 | return torch.load(file) 100 | 101 | 102 | # A sampler allows you to define how to select items from your Dataset. Torch 103 | # provides a number of default Sampler classes 104 | class TokenSampler(Sampler): 105 | """Produce batches with up to `batch_size` tokens in each batch""" 106 | 107 | def __init__( 108 | self, dataset, batch_size=200, size_fn=len, drop_last=False, shuffle=True 109 | ): 110 | """ 111 | args: `dataset` a torch.utils.data.Dataset (iterable style) 112 | `batch_size` the maximum number of tokens in a batch 113 | `size_fn` a callable that yields the number of tokens in a dataset item 114 | `drop_last` if True and the data can't be divided in exactly the right number of batch, drop the last batch 115 | `shuffle` if True, shuffle between every iteration 116 | """ 117 | self.dataset = dataset 118 | self.batch_size = batch_size 119 | self.size_fn = size_fn 120 | self._len = None 121 | self.drop_last = drop_last 122 | self.shuffle = True 123 | 124 | def __iter__(self): 125 | indices = range(len(self.dataset)) 126 | if self.shuffle: 127 | indices = list(indices) 128 | random.shuffle(indices) 129 | i = 0 130 | selected = [] 131 | numel = 0 132 | longest_len = 0 133 | for i in indices: 134 | if numel + self.size_fn(self.dataset[i]) > self.batch_size: 135 | if selected: 136 | yield selected 137 | selected = [] 138 | numel = 0 139 | numel += self.size_fn(self.dataset[i]) 140 | selected.append(i) 141 | if selected and not self.drop_last: 142 | yield selected 143 | 144 | def __len__(self): 145 | if self._len is None: 146 | self._len = ( 147 | sum(self.size_fn(self.dataset[i]) for i in range(len(self.dataset))) 148 | // self.batch_size 149 | ) 150 | return self._len 151 | 152 | 153 | # DataLoaders give access to an iterator over the dataset, using a sampling 154 | # strategy as defined through a Sampler. 155 | def get_dataloader(dataset, batch_size=200, shuffle=True): 156 | """produce dataloader. 157 | args: `dataset` a torch.utils.data.Dataset (iterable style) 158 | `batch_size` the maximum number of tokens in a batch 159 | `shuffle` if True, shuffle between every iteration 160 | """ 161 | # some constants for the closures 162 | has_gloss = dataset.has_gloss 163 | has_vecs = dataset.has_vecs 164 | has_electra = dataset.has_electra 165 | PAD_idx = dataset.vocab[PAD] 166 | 167 | # the collate function has to convert a list of dataset items into a batch 168 | def do_collate(json_dicts): 169 | """collates example into a dict batch; produces ands pads tensors""" 170 | batch = defaultdict(list) 171 | for jdict in json_dicts: 172 | for key in jdict: 173 | batch[key].append(jdict[key]) 174 | if has_gloss: 175 | batch["gloss_tensor"] = pad_sequence( 176 | batch["gloss_tensor"], padding_value=PAD_idx, batch_first=False 177 | ) 178 | if has_vecs: 179 | for arch in SUPPORTED_ARCHS: 180 | batch[f"{arch}_tensor"] = torch.stack(batch[f"{arch}_tensor"]) 181 | if has_electra: 182 | batch["electra_tensor"] = torch.stack(batch["electra_tensor"]) 183 | return dict(batch) 184 | 185 | if dataset.has_gloss: 186 | # we try to keep the amount of gloss tokens roughly constant across all 187 | # batches. 188 | def do_size_item(item): 189 | """retrieve tensor size, so as to batch items per elements""" 190 | return item["gloss_tensor"].numel() 191 | 192 | return DataLoader( 193 | dataset, 194 | collate_fn=do_collate, 195 | batch_sampler=TokenSampler( 196 | dataset, batch_size=batch_size, size_fn=do_size_item, shuffle=shuffle 197 | ), 198 | ) 199 | else: 200 | # there's no gloss, hence no gloss tokens, so we use a default batching 201 | # strategy. 202 | return DataLoader( 203 | dataset, collate_fn=do_collate, batch_size=batch_size, shuffle=shuffle 204 | ) 205 | -------------------------------------------------------------------------------- /code/revdict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import json 4 | import logging 5 | import pathlib 6 | import sys 7 | 8 | logger = logging.getLogger(pathlib.Path(__file__).name) 9 | logger.setLevel(logging.DEBUG) 10 | handler = logging.StreamHandler(sys.stdout) 11 | handler.setFormatter( 12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 13 | ) 14 | logger.addHandler(handler) 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.optim as optim 20 | from torch.utils.tensorboard import SummaryWriter 21 | 22 | import tqdm 23 | 24 | import data 25 | import models 26 | 27 | 28 | def get_parser( 29 | parser=argparse.ArgumentParser( 30 | description="Run a reverse dictionary baseline.\nThe task consists in reconstructing an embedding from the glosses listed in the datasets" 31 | ), 32 | ): 33 | parser.add_argument( 34 | "--do_train", action="store_true", help="whether to train a model from scratch" 35 | ) 36 | parser.add_argument( 37 | "--do_pred", action="store_true", help="whether to produce predictions" 38 | ) 39 | parser.add_argument( 40 | "--train_file", type=pathlib.Path, help="path to the train file" 41 | ) 42 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file") 43 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file") 44 | parser.add_argument( 45 | "--device", 46 | type=torch.device, 47 | default=torch.device("cpu"), 48 | help="path to the train file", 49 | ) 50 | parser.add_argument( 51 | "--target_arch", 52 | type=str, 53 | default="sgns", 54 | choices=("sgns", "char", "electra"), 55 | help="embedding architecture to use as target", 56 | ) 57 | parser.add_argument( 58 | "--summary_logdir", 59 | type=pathlib.Path, 60 | default=pathlib.Path("logs") / f"revdict-baseline", 61 | help="write logs for future analysis", 62 | ) 63 | parser.add_argument( 64 | "--save_dir", 65 | type=pathlib.Path, 66 | default=pathlib.Path("models") / f"revdict-baseline", 67 | help="where to save model & vocab", 68 | ) 69 | parser.add_argument( 70 | "--pred_file", 71 | type=pathlib.Path, 72 | default=pathlib.Path("revdict-baseline-preds.json"), 73 | help="where to save predictions", 74 | ) 75 | return parser 76 | 77 | 78 | def train(args): 79 | assert args.train_file is not None, "Missing dataset for training" 80 | # 1. get data, vocabulary, summary writer 81 | logger.debug("Preloading data") 82 | ## make datasets 83 | train_dataset = data.JSONDataset(args.train_file) 84 | if args.dev_file: 85 | dev_dataset = data.JSONDataset(args.dev_file, vocab=train_dataset.vocab) 86 | ## assert they correspond to the task 87 | assert train_dataset.has_gloss, "Training dataset contains no gloss." 88 | if args.target_arch == "electra": 89 | assert train_dataset.has_electra, "Training datatset contains no vector." 90 | else: 91 | assert train_dataset.has_vecs, "Training datatset contains no vector." 92 | if args.dev_file: 93 | assert dev_dataset.has_gloss, "Development dataset contains no gloss." 94 | if args.target_arch == "electra": 95 | assert dev_dataset.has_electra, "Development dataset contains no vector." 96 | else: 97 | assert dev_dataset.has_vecs, "Development dataset contains no vector." 98 | ## make dataloader 99 | train_dataloader = data.get_dataloader(train_dataset, batch_size=512) 100 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False, batch_size=1024) 101 | ## make summary writer 102 | summary_writer = SummaryWriter(args.summary_logdir) 103 | train_step = itertools.count() # to keep track of the training steps for logging 104 | 105 | # 2. construct model 106 | ## Hyperparams 107 | logger.debug("Setting up training environment") 108 | model = models.RevdictModel(dev_dataset.vocab).to(args.device) 109 | model.train() 110 | 111 | # 3. declare optimizer & criterion 112 | ## Hyperparams 113 | EPOCHS, LEARNING_RATE, BETA1, BETA2, WEIGHT_DECAY = 10, 1.0e-4, 0.9, 0.999, 1.0e-6 114 | optimizer = optim.AdamW( 115 | model.parameters(), 116 | lr=LEARNING_RATE, 117 | betas=(BETA1, BETA2), 118 | weight_decay=WEIGHT_DECAY, 119 | ) 120 | criterion = nn.MSELoss() 121 | 122 | vec_tensor_key = f"{args.target_arch}_tensor" 123 | 124 | # 4. train model 125 | for epoch in tqdm.trange(EPOCHS, desc="Epochs"): 126 | ## train loop 127 | pbar = tqdm.tqdm( 128 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False 129 | ) 130 | for batch in train_dataloader: 131 | optimizer.zero_grad() 132 | gls = batch["gloss_tensor"].to(args.device) 133 | vec = batch[vec_tensor_key].to(args.device) 134 | pred = model(gls) 135 | loss = criterion(pred, vec) 136 | loss.backward() 137 | # keep track of the train loss for this step 138 | next_step = next(train_step) 139 | summary_writer.add_scalar( 140 | "revdict-train/cos", 141 | F.cosine_similarity(pred, vec).mean().item(), 142 | next_step, 143 | ) 144 | summary_writer.add_scalar("revdict-train/mse", loss.item(), next_step) 145 | optimizer.step() 146 | pbar.update(vec.size(0)) 147 | pbar.close() 148 | ## eval loop 149 | if args.dev_file: 150 | model.eval() 151 | with torch.no_grad(): 152 | sum_dev_loss, sum_cosine = 0.0, 0.0 153 | pbar = tqdm.tqdm( 154 | desc=f"Eval {epoch}", 155 | total=len(dev_dataset), 156 | disable=None, 157 | leave=False, 158 | ) 159 | for batch in dev_dataloader: 160 | gls = batch["gloss_tensor"].to(args.device) 161 | vec = batch[vec_tensor_key].to(args.device) 162 | pred = model(gls) 163 | sum_dev_loss += ( 164 | F.mse_loss(pred, vec, reduction="none").mean(1).sum().item() 165 | ) 166 | sum_cosine += F.cosine_similarity(pred, vec).sum().item() 167 | pbar.update(vec.size(0)) 168 | # keep track of the average loss on dev set for this epoch 169 | summary_writer.add_scalar( 170 | "revdict-dev/cos", sum_cosine / len(dev_dataset), epoch 171 | ) 172 | summary_writer.add_scalar( 173 | "revdict-dev/mse", sum_dev_loss / len(dev_dataset), epoch 174 | ) 175 | pbar.close() 176 | model.train() 177 | 178 | # 5. save result 179 | model.save(args.save_dir / "model.pt") 180 | train_dataset.save(args.save_dir / "train_dataset.pt") 181 | dev_dataset.save(args.save_dir / "dev_dataset.pt") 182 | 183 | 184 | def pred(args): 185 | assert args.test_file is not None, "Missing dataset for test" 186 | # 1. retrieve vocab, dataset, model 187 | model = models.DefmodModel.load(args.save_dir / "model.pt") 188 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab 189 | test_dataset = data.JSONDataset( 190 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen 191 | ) 192 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1024) 193 | model.eval() 194 | vec_tensor_key = f"{args.target_arch}_tensor" 195 | assert test_dataset.has_gloss, "File is not usable for the task" 196 | # 2. make predictions 197 | predictions = [] 198 | with torch.no_grad(): 199 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset)) 200 | for batch in test_dataloader: 201 | vecs = model(batch["gloss_tensor"].to(args.device)).cpu() 202 | for id, vec in zip(batch["id"], vecs.unbind()): 203 | predictions.append( 204 | {"id": id, args.target_arch: vec.view(-1).cpu().tolist()} 205 | ) 206 | pbar.update(vecs.size(0)) 207 | pbar.close() 208 | with open(args.pred_file, "w") as ostr: 209 | json.dump(predictions, ostr) 210 | 211 | 212 | def main(args): 213 | if args.do_train: 214 | logger.debug("Performing revdict training") 215 | train(args) 216 | if args.do_pred: 217 | logger.debug("Performing revdict prediction") 218 | pred(args) 219 | 220 | 221 | if __name__ == "__main__": 222 | args = get_parser().parse_args() 223 | main(args) 224 | -------------------------------------------------------------------------------- /code/defmod.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import json 4 | import logging 5 | import pathlib 6 | import sys 7 | 8 | logger = logging.getLogger(pathlib.Path(__file__).name) 9 | logger.setLevel(logging.DEBUG) 10 | handler = logging.StreamHandler(sys.stdout) 11 | handler.setFormatter( 12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 13 | ) 14 | logger.addHandler(handler) 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.optim as optim 20 | from torch.utils.tensorboard import SummaryWriter 21 | 22 | import tqdm 23 | 24 | import data 25 | import models 26 | 27 | 28 | def get_parser( 29 | parser=argparse.ArgumentParser(description="run a definition modeling baseline"), 30 | ): 31 | parser.add_argument( 32 | "--do_train", action="store_true", help="whether to train a model from scratch" 33 | ) 34 | parser.add_argument( 35 | "--do_pred", action="store_true", help="whether to produce predictions" 36 | ) 37 | parser.add_argument( 38 | "--train_file", type=pathlib.Path, help="path to the train file" 39 | ) 40 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file") 41 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file") 42 | parser.add_argument( 43 | "--device", 44 | type=torch.device, 45 | default=torch.device("cpu"), 46 | help="path to the train file", 47 | ) 48 | parser.add_argument( 49 | "--source_arch", 50 | type=str, 51 | default="sgns", 52 | choices=("sgns", "char", "electra"), 53 | help="embedding architecture to use as source", 54 | ) 55 | parser.add_argument( 56 | "--summary_logdir", 57 | type=pathlib.Path, 58 | default=pathlib.Path("logs") / "defmod-baseline", 59 | help="write logs for future analysis", 60 | ) 61 | parser.add_argument( 62 | "--save_dir", 63 | type=pathlib.Path, 64 | default=pathlib.Path("models") / "defmod-baseline", 65 | help="where to save model & vocab", 66 | ) 67 | parser.add_argument( 68 | "--pred_file", 69 | type=pathlib.Path, 70 | default=pathlib.Path("defmod-baseline-preds.json"), 71 | help="where to save predictions", 72 | ) 73 | return parser 74 | 75 | 76 | def train(args): 77 | assert args.train_file is not None, "Missing dataset for training" 78 | # 1. get data, vocabulary, summary writer 79 | logger.debug("Preloading training data") 80 | ## make datasets 81 | train_dataset = data.JSONDataset(args.train_file) 82 | if args.dev_file: 83 | dev_dataset = data.JSONDataset(args.dev_file, vocab=train_dataset.vocab) 84 | ## assert they correspond to the task 85 | assert train_dataset.has_gloss, "Training dataset contains no gloss." 86 | if args.source_arch == "electra": 87 | assert train_dataset.has_electra, "Training datatset contains no vector." 88 | else: 89 | assert train_dataset.has_vecs, "Training datatset contains no vector." 90 | if args.dev_file: 91 | assert dev_dataset.has_gloss, "Development dataset contains no gloss." 92 | if args.source_arch == "electra": 93 | assert dev_dataset.has_electra, "Development dataset contains no vector." 94 | else: 95 | assert dev_dataset.has_vecs, "Development dataset contains no vector." 96 | ## make dataloader 97 | train_dataloader = data.get_dataloader(train_dataset) 98 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False) 99 | ## make summary writer 100 | summary_writer = SummaryWriter(args.summary_logdir) 101 | train_step = itertools.count() # to keep track of the training steps for logging 102 | 103 | # 2. construct model 104 | logger.debug("Setting up training environment") 105 | 106 | model = models.DefmodModel(dev_dataset.vocab).to(args.device) 107 | model.train() 108 | 109 | # 3. declare optimizer & criterion 110 | ## Hyperparams 111 | EPOCHS, LEARNING_RATE, BETA1, BETA2, WEIGHT_DECAY = 10, 1.0e-4, 0.9, 0.999, 1.0e-6 112 | optimizer = optim.AdamW( 113 | model.parameters(), 114 | lr=LEARNING_RATE, 115 | betas=(BETA1, BETA2), 116 | weight_decay=WEIGHT_DECAY, 117 | ) 118 | criterion = nn.CrossEntropyLoss(ignore_index=model.padding_idx) 119 | 120 | vec_tensor_key = f"{args.source_arch}_tensor" 121 | 122 | # 4. train model 123 | for epoch in tqdm.trange(EPOCHS, desc="Epochs"): 124 | ## train loop 125 | pbar = tqdm.tqdm( 126 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False 127 | ) 128 | for batch in train_dataloader: 129 | optimizer.zero_grad() 130 | vec = batch[vec_tensor_key].to(args.device) 131 | gls = batch["gloss_tensor"].to(args.device) 132 | pred = model(vec, gls[:-1]) 133 | loss = criterion(pred.view(-1, pred.size(-1)), gls.view(-1)) 134 | loss.backward() 135 | # keep track of the train loss for this step 136 | tokens = gls != model.padding_idx 137 | acc = ( 138 | ((pred.argmax(-1) == gls) & tokens).float().sum() / tokens.sum() 139 | ).item() 140 | step = next(train_step) 141 | summary_writer.add_scalar("defmod-train/xent", loss.item(), step) 142 | summary_writer.add_scalar("defmod-train/acc", acc, step) 143 | optimizer.step() 144 | pbar.update(vec.size(0)) 145 | pbar.close() 146 | ## eval loop 147 | if args.dev_file: 148 | model.eval() 149 | with torch.no_grad(): 150 | sum_dev_loss = 0.0 151 | sum_acc = 0 152 | ntoks = 0 153 | pbar = tqdm.tqdm( 154 | desc=f"Eval {epoch}", 155 | total=len(dev_dataset), 156 | disable=None, 157 | leave=False, 158 | ) 159 | for batch in dev_dataloader: 160 | vec = batch[vec_tensor_key].to(args.device) 161 | gls = batch["gloss_tensor"].to(args.device) 162 | pred = model(vec, gls[:-1]) 163 | sum_dev_loss += F.cross_entropy( 164 | pred.view(-1, pred.size(-1)), 165 | gls.view(-1), 166 | reduction="sum", 167 | ignore_index=model.padding_idx, 168 | ).item() 169 | tokens = gls != model.padding_idx 170 | ntoks += tokens.sum().item() 171 | sum_acc += ((pred.argmax(-1) == gls) & tokens).sum().item() 172 | pbar.update(vec.size(0)) 173 | 174 | # keep track of the average loss & acc on dev set for this epoch 175 | summary_writer.add_scalar( 176 | "defmod-dev/xent", sum_dev_loss / ntoks, epoch 177 | ) 178 | summary_writer.add_scalar("defmod-dev/acc", sum_acc / ntoks, epoch) 179 | pbar.close() 180 | model.train() 181 | 182 | # 5. save result 183 | model.save(args.save_dir / "model.pt") 184 | train_dataset.save(args.save_dir / "train_dataset.pt") 185 | dev_dataset.save(args.save_dir / "dev_dataset.pt") 186 | 187 | 188 | def pred(args): 189 | assert args.test_file is not None, "Missing dataset for test" 190 | # 1. retrieve vocab, dataset, model 191 | model = models.DefmodModel.load(args.save_dir / "model.pt") 192 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab 193 | test_dataset = data.JSONDataset( 194 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen 195 | ) 196 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False) 197 | model.eval() 198 | vec_tensor_key = f"{args.source_arch}_tensor" 199 | if args.source_arch == "electra": 200 | assert test_dataset.has_electra, "File is not usable for the task" 201 | else: 202 | assert test_dataset.has_vecs, "File is not usable for the task" 203 | # 2. make predictions 204 | predictions = [] 205 | with torch.no_grad(): 206 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset), disable=None) 207 | for batch in test_dataloader: 208 | sequence = model.pred(batch[vec_tensor_key].to(args.device)) 209 | for id, gloss in zip(batch["id"], test_dataset.decode(sequence)): 210 | predictions.append({"id": id, "gloss": gloss}) 211 | pbar.update(batch[vec_tensor_key].size(0)) 212 | pbar.close() 213 | # 3. dump predictions 214 | with open(args.pred_file, "a") as ostr: 215 | json.dump(predictions, ostr) 216 | 217 | 218 | def main(args): 219 | if args.do_train: 220 | logger.debug("Performing defmod training") 221 | train(args) 222 | if args.do_pred: 223 | logger.debug("Performing defmod prediction") 224 | pred(args) 225 | 226 | 227 | if __name__ == "__main__": 228 | args = get_parser().parse_args() 229 | main(args) 230 | -------------------------------------------------------------------------------- /baseline_archs/code/score.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import itertools 4 | import json 5 | import logging 6 | import os 7 | import pathlib 8 | import sys 9 | 10 | logger = logging.getLogger(pathlib.Path(__file__).name) 11 | logger.setLevel(logging.DEBUG) 12 | handler = logging.StreamHandler(sys.stdout) 13 | handler.setFormatter( 14 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 15 | ) 16 | logger.addHandler(handler) 17 | 18 | os.environ["MOVERSCORE_MODEL"] = "distilbert-base-multilingual-cased" 19 | import moverscore_v2 as mv_sc 20 | 21 | from nltk.translate.bleu_score import sentence_bleu as bleu 22 | from nltk import word_tokenize as tokenize 23 | 24 | import numpy as np 25 | 26 | import torch 27 | import torch.nn.functional as F 28 | 29 | import tqdm 30 | 31 | import check_output 32 | 33 | 34 | def get_parser(parser=argparse.ArgumentParser(description="score a submission")): 35 | parser.add_argument( 36 | "submission_path", 37 | type=pathlib.Path, 38 | help="path to submission file to be scored, or to a directory of submissions to be scored", 39 | ) 40 | parser.add_argument( 41 | "--reference_files_dir", 42 | type=pathlib.Path, 43 | help="directory containing all reference files", 44 | default=pathlib.Path("data"), 45 | ) 46 | parser.add_argument( 47 | "--output_file", 48 | type=pathlib.Path, 49 | help="default path to print output", 50 | default=pathlib.Path("scores.txt"), 51 | ) 52 | return parser 53 | 54 | 55 | def mover_corpus_score(sys_stream, ref_streams, trace=0): 56 | """Adapted from the MoverScore github""" 57 | 58 | if isinstance(sys_stream, str): 59 | sys_stream = [sys_stream] 60 | if isinstance(ref_streams, str): 61 | ref_streams = [[ref_streams]] 62 | fhs = [sys_stream] + ref_streams 63 | corpus_score = 0 64 | pbar = tqdm.tqdm(desc="MvSc.", disable=None, total=len(sys_stream)) 65 | for lines in itertools.zip_longest(*fhs): 66 | if None in lines: 67 | raise EOFError("Source and reference streams have different lengths!") 68 | hypo, *refs = lines 69 | idf_dict_hyp = collections.defaultdict(lambda: 1.0) 70 | idf_dict_ref = collections.defaultdict(lambda: 1.0) 71 | corpus_score += mv_sc.word_mover_score( 72 | refs, 73 | [hypo], 74 | idf_dict_ref, 75 | idf_dict_hyp, 76 | stop_words=[], 77 | n_gram=1, 78 | remove_subwords=False, 79 | )[0] 80 | pbar.update() 81 | pbar.close() 82 | corpus_score /= len(sys_stream) 83 | return corpus_score 84 | 85 | 86 | def eval_defmod(args, summary): 87 | # 1. read contents 88 | ## define accumulators for lemma-level BLEU and MoverScore 89 | reference_lemma_groups = collections.defaultdict(list) 90 | all_preds, all_tgts = [], [] 91 | ## reading data files 92 | with open(args.submission_file, "r") as fp: 93 | submission = sorted(json.load(fp), key=lambda r: r["id"]) 94 | with open(args.reference_file, "r") as fp: 95 | reference = sorted(json.load(fp), key=lambda r: r["id"]) 96 | 97 | # 2. compute scores 98 | ## compute sense-level BLEU 99 | assert len(submission) == len(reference), "Missing items in submission!" 100 | id_to_lemma = {} 101 | pbar = tqdm.tqdm(total=len(submission), desc="S-BLEU", disable=None) 102 | for sub, ref in zip(submission, reference): 103 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!" 104 | all_preds.append(sub["gloss"]) 105 | all_tgts.append(ref["gloss"]) 106 | sub["gloss"] = tokenize(sub["gloss"]) 107 | ref["gloss"] = tokenize(ref["gloss"]) 108 | sub["sense-BLEU"] = bleu([sub["gloss"]], ref["gloss"]) 109 | reference_lemma_groups[(ref["word"], ref["pos"])].append(ref["gloss"]) 110 | id_to_lemma[sub["id"]] = (ref["word"], ref["pos"]) 111 | pbar.update() 112 | pbar.close() 113 | ## compute lemma-level BLEU 114 | for sub in tqdm.tqdm(submission, desc="L-BLEU", disable=None): 115 | sub["lemma-BLEU"] = max( 116 | bleu([sub["gloss"]], g) 117 | for g in reference_lemma_groups[id_to_lemma[sub["id"]]] 118 | ) 119 | lemma_bleu_average = sum(s["lemma-BLEU"] for s in submission) / len(submission) 120 | sense_bleu_average = sum(s["sense-BLEU"] for s in submission) / len(submission) 121 | ## compute MoverScore 122 | # moverscore_average = np.mean(mv_sc.word_mover_score( 123 | # all_tgts, 124 | # all_preds, 125 | # collections.defaultdict(lambda:1.), 126 | # collections.defaultdict(lambda:1.), 127 | # stop_words=[], 128 | # n_gram=1, 129 | # remove_subwords=False, 130 | # batch_size=1, 131 | # )) 132 | moverscore_average = mover_corpus_score(all_preds, [all_tgts]) 133 | # 3. write results. 134 | # logger.debug(f"Submission {args.submission_file}, \n\tMvSc.: " + \ 135 | # f"{moverscore_average}\n\tL-BLEU: {lemma_bleu_average}\n\tS-BLEU: " + \ 136 | # f"{sense_bleu_average}" 137 | # ) 138 | with open(args.output_file, "a") as ostr: 139 | print(f"MoverScore_{summary.lang}:{moverscore_average}", file=ostr) 140 | print(f"BLEU_lemma_{summary.lang}:{lemma_bleu_average}", file=ostr) 141 | print(f"BLEU_sense_{summary.lang}:{sense_bleu_average}", file=ostr) 142 | return ( 143 | args.submission_file, 144 | moverscore_average, 145 | lemma_bleu_average, 146 | sense_bleu_average, 147 | ) 148 | 149 | 150 | def rank_cosine(preds, targets): 151 | unique_targets = targets.unique(dim=0) 152 | all_assocs = preds @ F.normalize(targets).T 153 | unique_assocs = preds @ F.normalize(unique_targets).T 154 | refs = torch.diagonal(all_assocs, 0).unsqueeze(1) 155 | ranks = (unique_assocs >= refs).sum(1).float().mean().item() 156 | return ranks / unique_targets.size(0) 157 | 158 | 159 | def eval_revdict(args, summary): 160 | # 1. read contents 161 | ## read data files 162 | with open(args.submission_file, "r") as fp: 163 | submission = sorted(json.load(fp), key=lambda r: r["id"]) 164 | with open(args.reference_file, "r") as fp: 165 | reference = sorted(json.load(fp), key=lambda r: r["id"]) 166 | vec_archs = sorted( 167 | set(submission[0].keys()) 168 | - { 169 | "id", 170 | "gloss", 171 | "word", 172 | "pos", 173 | "concrete", 174 | "example", 175 | "f_rnk", 176 | "counts", 177 | "polysemous", 178 | } 179 | ) 180 | ## define accumulators for rank-cosine 181 | all_preds = collections.defaultdict(list) 182 | all_refs = collections.defaultdict(list) 183 | 184 | assert len(submission) == len(reference), "Missing items in submission!" 185 | ## retrieve vectors 186 | for sub, ref in zip(submission, reference): 187 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!" 188 | for arch in vec_archs: 189 | all_preds[arch].append(sub[arch]) 190 | all_refs[arch].append(ref[arch]) 191 | 192 | torch.autograd.set_grad_enabled(False) 193 | all_preds = {arch: torch.tensor(all_preds[arch]) for arch in vec_archs} 194 | all_refs = {arch: torch.tensor(all_refs[arch]) for arch in vec_archs} 195 | 196 | # 2. compute scores 197 | MSE_scores = { 198 | arch: F.mse_loss(all_preds[arch], all_refs[arch]).item() for arch in vec_archs 199 | } 200 | cos_scores = { 201 | arch: F.cosine_similarity(all_preds[arch], all_refs[arch]).mean().item() 202 | for arch in vec_archs 203 | } 204 | rnk_scores = { 205 | arch: rank_cosine(all_preds[arch], all_refs[arch]) for arch in vec_archs 206 | } 207 | # 3. display results 208 | # logger.debug(f"Submission {args.submission_file}, \n\tMSE: " + \ 209 | # ", ".join(f"{a}={MSE_scores[a]}" for a in vec_archs) + \ 210 | # ", \n\tcosine: " + \ 211 | # ", ".join(f"{a}={cos_scores[a]}" for a in vec_archs) + \ 212 | # ", \n\tcosine ranks: " + \ 213 | # ", ".join(f"{a}={rnk_scores[a]}" for a in vec_archs) + \ 214 | # "." 215 | # ) 216 | # all_archs = sorted(set(reference[0].keys()) - {"id", "gloss", "word", "pos"}) 217 | with open(args.output_file, "a") as ostr: 218 | for arch in vec_archs: 219 | print(f"MSE_{summary.lang}_{arch}:{MSE_scores[arch]}", file=ostr) 220 | print(f"cos_{summary.lang}_{arch}:{cos_scores[arch]}", file=ostr) 221 | print(f"rnk_{summary.lang}_{arch}:{rnk_scores[arch]}", file=ostr) 222 | return ( 223 | args.submission_file, 224 | *[MSE_scores.get(a, None) for a in vec_archs], 225 | *[cos_scores.get(a, None) for a in vec_archs], 226 | ) 227 | 228 | 229 | def main(args): 230 | def do_score(submission_file, summary): 231 | args.submission_file = submission_file 232 | args.reference_file = ( 233 | args.reference_files_dir 234 | / f"{summary.lang}.test.{summary.track}.complete.json" 235 | ) 236 | eval_func = eval_revdict if summary.track == "revdict" else eval_defmod 237 | eval_func(args, summary) 238 | 239 | if args.output_file.is_dir(): 240 | args.output_file = args.output_file / "scores.txt" 241 | # wipe file if exists 242 | open(args.output_file, "w").close() 243 | if args.submission_path.is_dir(): 244 | files = list(args.submission_path.glob("*.json")) 245 | assert len(files) >= 1, "No data to score!" 246 | summaries = [check_output.main(f) for f in files] 247 | assert len(set(summaries)) == len(files), "Ensure files map to unique setups." 248 | rd_cfg = [ 249 | (s.lang, a) for s in summaries if s.track == "revdict" for a in s.vec_archs 250 | ] 251 | assert len(set(rd_cfg)) == len(rd_cfg), "Ensure files map to unique setups." 252 | for summary, submitted_file in zip(summaries, files): 253 | do_score(submitted_file, summary) 254 | else: 255 | summary = check_output.main(args.submission_path) 256 | do_score(args.submission_path, summary) 257 | 258 | 259 | if __name__ == "__main__": 260 | main(get_parser().parse_args()) 261 | -------------------------------------------------------------------------------- /code/score.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import itertools 4 | import json 5 | import logging 6 | import os 7 | import pathlib 8 | import sys 9 | 10 | logger = logging.getLogger(pathlib.Path(__file__).name) 11 | logger.setLevel(logging.DEBUG) 12 | handler = logging.StreamHandler(sys.stdout) 13 | handler.setFormatter( 14 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 15 | ) 16 | logger.addHandler(handler) 17 | 18 | os.environ["MOVERSCORE_MODEL"] = "distilbert-base-multilingual-cased" 19 | import moverscore_v2 as mv_sc 20 | 21 | from nltk.translate.bleu_score import sentence_bleu 22 | from nltk.translate.bleu_score import SmoothingFunction 23 | from nltk import word_tokenize as tokenize 24 | 25 | import numpy as np 26 | 27 | import torch 28 | import torch.nn.functional as F 29 | 30 | import tqdm 31 | 32 | import check_output 33 | 34 | 35 | def get_parser(parser=argparse.ArgumentParser(description="score a submission")): 36 | parser.add_argument( 37 | "submission_path", 38 | type=pathlib.Path, 39 | help="path to submission file to be scored, or to a directory of submissions to be scored", 40 | ) 41 | parser.add_argument( 42 | "--reference_files_dir", 43 | type=pathlib.Path, 44 | help="directory containing all reference files", 45 | default=pathlib.Path("data"), 46 | ) 47 | parser.add_argument( 48 | "--output_file", 49 | type=pathlib.Path, 50 | help="default path to print output", 51 | default=pathlib.Path("scores.txt"), 52 | ) 53 | return parser 54 | 55 | def bleu(pred, target, smoothing_function=SmoothingFunction().method4): 56 | return sentence_bleu([pred], target, smoothing_function=smoothing_function) 57 | 58 | 59 | def mover_corpus_score(sys_stream, ref_streams, trace=0): 60 | """Adapted from the MoverScore github""" 61 | 62 | if isinstance(sys_stream, str): 63 | sys_stream = [sys_stream] 64 | if isinstance(ref_streams, str): 65 | ref_streams = [[ref_streams]] 66 | fhs = [sys_stream] + ref_streams 67 | corpus_score = 0 68 | pbar = tqdm.tqdm(desc="MvSc.", disable=None, total=len(sys_stream)) 69 | for lines in itertools.zip_longest(*fhs): 70 | if None in lines: 71 | raise EOFError("Source and reference streams have different lengths!") 72 | hypo, *refs = lines 73 | idf_dict_hyp = collections.defaultdict(lambda: 1.0) 74 | idf_dict_ref = collections.defaultdict(lambda: 1.0) 75 | corpus_score += mv_sc.word_mover_score( 76 | refs, 77 | [hypo], 78 | idf_dict_ref, 79 | idf_dict_hyp, 80 | stop_words=[], 81 | n_gram=1, 82 | remove_subwords=False, 83 | )[0] 84 | pbar.update() 85 | pbar.close() 86 | corpus_score /= len(sys_stream) 87 | return corpus_score 88 | 89 | 90 | def eval_defmod(args, summary): 91 | # 1. read contents 92 | ## define accumulators for lemma-level BLEU and MoverScore 93 | reference_lemma_groups = collections.defaultdict(list) 94 | all_preds, all_tgts = [], [] 95 | ## reading data files 96 | with open(args.submission_file, "r") as fp: 97 | submission = sorted(json.load(fp), key=lambda r: r["id"]) 98 | with open(args.reference_file, "r") as fp: 99 | reference = sorted(json.load(fp), key=lambda r: r["id"]) 100 | 101 | # 2. compute scores 102 | ## compute sense-level BLEU 103 | assert len(submission) == len(reference), "Missing items in submission!" 104 | id_to_lemma = {} 105 | pbar = tqdm.tqdm(total=len(submission), desc="S-BLEU", disable=None) 106 | for sub, ref in zip(submission, reference): 107 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!" 108 | all_preds.append(sub["gloss"]) 109 | all_tgts.append(ref["gloss"]) 110 | sub["gloss"] = tokenize(sub["gloss"]) 111 | ref["gloss"] = tokenize(ref["gloss"]) 112 | sub["sense-BLEU"] = bleu(sub["gloss"], ref["gloss"]) 113 | reference_lemma_groups[(ref["word"], ref["pos"])].append(ref["gloss"]) 114 | id_to_lemma[sub["id"]] = (ref["word"], ref["pos"]) 115 | pbar.update() 116 | pbar.close() 117 | ## compute lemma-level BLEU 118 | for sub in tqdm.tqdm(submission, desc="L-BLEU", disable=None): 119 | sub["lemma-BLEU"] = max( 120 | bleu(sub["gloss"], g) 121 | for g in reference_lemma_groups[id_to_lemma[sub["id"]]] 122 | ) 123 | lemma_bleu_average = sum(s["lemma-BLEU"] for s in submission) / len(submission) 124 | sense_bleu_average = sum(s["sense-BLEU"] for s in submission) / len(submission) 125 | ## compute MoverScore 126 | # moverscore_average = np.mean(mv_sc.word_mover_score( 127 | # all_tgts, 128 | # all_preds, 129 | # collections.defaultdict(lambda:1.), 130 | # collections.defaultdict(lambda:1.), 131 | # stop_words=[], 132 | # n_gram=1, 133 | # remove_subwords=False, 134 | # batch_size=1, 135 | # )) 136 | moverscore_average = mover_corpus_score(all_preds, [all_tgts]) 137 | # 3. write results. 138 | # logger.debug(f"Submission {args.submission_file}, \n\tMvSc.: " + \ 139 | # f"{moverscore_average}\n\tL-BLEU: {lemma_bleu_average}\n\tS-BLEU: " + \ 140 | # f"{sense_bleu_average}" 141 | # ) 142 | with open(args.output_file, "a") as ostr: 143 | print(f"MoverScore_{summary.lang}:{moverscore_average}", file=ostr) 144 | print(f"BLEU_lemma_{summary.lang}:{lemma_bleu_average}", file=ostr) 145 | print(f"BLEU_sense_{summary.lang}:{sense_bleu_average}", file=ostr) 146 | return ( 147 | args.submission_file, 148 | moverscore_average, 149 | lemma_bleu_average, 150 | sense_bleu_average, 151 | ) 152 | 153 | 154 | def rank_cosine(preds, targets): 155 | assocs = F.normalize(preds) @ F.normalize(targets).T 156 | refs = torch.diagonal(assocs, 0).unsqueeze(1) 157 | ranks = (assocs >= refs).sum(1).float() 158 | assert ranks.numel() == preds.size(0) 159 | ranks = ranks.mean().item() 160 | return ranks / preds.size(0) 161 | 162 | 163 | def eval_revdict(args, summary): 164 | # 1. read contents 165 | ## read data files 166 | with open(args.submission_file, "r") as fp: 167 | submission = sorted(json.load(fp), key=lambda r: r["id"]) 168 | with open(args.reference_file, "r") as fp: 169 | reference = sorted(json.load(fp), key=lambda r: r["id"]) 170 | vec_archs = sorted( 171 | set(submission[0].keys()) 172 | - { 173 | "id", 174 | "gloss", 175 | "word", 176 | "pos", 177 | "concrete", 178 | "example", 179 | "f_rnk", 180 | "counts", 181 | "polysemous", 182 | } 183 | ) 184 | ## define accumulators for rank-cosine 185 | all_preds = collections.defaultdict(list) 186 | all_refs = collections.defaultdict(list) 187 | 188 | assert len(submission) == len(reference), "Missing items in submission!" 189 | ## retrieve vectors 190 | for sub, ref in zip(submission, reference): 191 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!" 192 | for arch in vec_archs: 193 | all_preds[arch].append(sub[arch]) 194 | all_refs[arch].append(ref[arch]) 195 | 196 | torch.autograd.set_grad_enabled(False) 197 | all_preds = {arch: torch.tensor(all_preds[arch]) for arch in vec_archs} 198 | all_refs = {arch: torch.tensor(all_refs[arch]) for arch in vec_archs} 199 | 200 | # 2. compute scores 201 | MSE_scores = { 202 | arch: F.mse_loss(all_preds[arch], all_refs[arch]).item() for arch in vec_archs 203 | } 204 | cos_scores = { 205 | arch: F.cosine_similarity(all_preds[arch], all_refs[arch]).mean().item() 206 | for arch in vec_archs 207 | } 208 | rnk_scores = { 209 | arch: rank_cosine(all_preds[arch], all_refs[arch]) for arch in vec_archs 210 | } 211 | # 3. display results 212 | # logger.debug(f"Submission {args.submission_file}, \n\tMSE: " + \ 213 | # ", ".join(f"{a}={MSE_scores[a]}" for a in vec_archs) + \ 214 | # ", \n\tcosine: " + \ 215 | # ", ".join(f"{a}={cos_scores[a]}" for a in vec_archs) + \ 216 | # ", \n\tcosine ranks: " + \ 217 | # ", ".join(f"{a}={rnk_scores[a]}" for a in vec_archs) + \ 218 | # "." 219 | # ) 220 | # all_archs = sorted(set(reference[0].keys()) - {"id", "gloss", "word", "pos"}) 221 | with open(args.output_file, "a") as ostr: 222 | for arch in vec_archs: 223 | print(f"MSE_{summary.lang}_{arch}:{MSE_scores[arch]}", file=ostr) 224 | print(f"cos_{summary.lang}_{arch}:{cos_scores[arch]}", file=ostr) 225 | print(f"rnk_{summary.lang}_{arch}:{rnk_scores[arch]}", file=ostr) 226 | return ( 227 | args.submission_file, 228 | *[MSE_scores.get(a, None) for a in vec_archs], 229 | *[cos_scores.get(a, None) for a in vec_archs], 230 | ) 231 | 232 | 233 | def main(args): 234 | def do_score(submission_file, summary): 235 | args.submission_file = submission_file 236 | args.reference_file = ( 237 | args.reference_files_dir 238 | / f"{summary.lang}.test.{summary.track}.complete.json" 239 | ) 240 | eval_func = eval_revdict if summary.track == "revdict" else eval_defmod 241 | eval_func(args, summary) 242 | 243 | if args.output_file.is_dir(): 244 | args.output_file = args.output_file / "scores.txt" 245 | # wipe file if exists 246 | open(args.output_file, "w").close() 247 | if args.submission_path.is_dir(): 248 | files = list(args.submission_path.glob("*.json")) 249 | assert len(files) >= 1, "No data to score!" 250 | summaries = [check_output.main(f) for f in files] 251 | assert len(set(summaries)) == len(files), "Ensure files map to unique setups." 252 | rd_cfg = [ 253 | (s.lang, a) for s in summaries if s.track == "revdict" for a in s.vec_archs 254 | ] 255 | assert len(set(rd_cfg)) == len(rd_cfg), "Ensure files map to unique setups." 256 | for summary, submitted_file in zip(summaries, files): 257 | do_score(submitted_file, summary) 258 | else: 259 | summary = check_output.main(args.submission_path) 260 | do_score(args.submission_path, summary) 261 | 262 | 263 | if __name__ == "__main__": 264 | main(get_parser().parse_args()) 265 | -------------------------------------------------------------------------------- /codalab/competition/competition.yaml: -------------------------------------------------------------------------------- 1 | title: CODWOE - Comparing Dictionaries and Word Embeddings 2 | description: SemEval 2022 Task 1 - Are dictionary glosses and word embedding representations semantically equivalent? Can we generate one from the other? 3 | image: codwoe-logo.png 4 | has_registration: True 5 | allow_teams: True 6 | competition_docker_image: linguistickus/codwoe 7 | html: 8 | overview: overview.html 9 | evaluation: evaluation.html 10 | terms: terms_and_conditions.html 11 | data: data.html 12 | phases: 13 | 1: 14 | phasenumber: 1 15 | label: "Evaluation" 16 | color: blue 17 | start_date: 2022-01-10 18 | max_submissions: 50 19 | scoring_program: scoring_program.zip 20 | reference_data: reference_data.zip 21 | leaderboard_management_mode: hide_results 22 | 2: 23 | phasenumber: 2 24 | label: "Post-Evaluation" 25 | color: purple 26 | start_date: 2022-02-01 27 | phase_never_ends: True 28 | max_submissions: 999 29 | scoring_program: scoring_program.zip 30 | reference_data: reference_data.zip 31 | auto_migration: True 32 | leaderboard_management_mode: default 33 | leaderboard: 34 | leaderboards: 35 | DEFMOD: &DEFMOD 36 | label: Definition Modeling 37 | rank: 1 38 | REVDICT_SGNS: &REVDICT_SGNS 39 | label: Reverse Dictionary (SGNS) 40 | rank: 2 41 | REVDICT_ELECTRA: &REVDICT_ELECTRA 42 | label: Reverse Dictionary (ELECTRA) 43 | rank: 3 44 | REVDICT_CHAR: &REVDICT_CHAR 45 | label: Reverse Dictionary (Character Embeddings) 46 | rank: 4 47 | column_groups: 48 | English: &EN 49 | label: English 50 | Spanish: &ES 51 | label: Spanish 52 | French: &FR 53 | label: French 54 | Italian: &IT 55 | label: Italian 56 | Russian: &RU 57 | label: Russian 58 | columns: 59 | MoverScore_en: 60 | label: MvSc. 61 | leaderboard: *DEFMOD 62 | column_group: *EN 63 | rank: 1 64 | numeric_format: 3 65 | BLEU_sense_en: 66 | label: S-BLEU 67 | leaderboard: *DEFMOD 68 | column_group: *EN 69 | rank: 2 70 | numeric_format: 3 71 | BLEU_lemma_en: 72 | label: L-BLEU 73 | leaderboard: *DEFMOD 74 | column_group: *EN 75 | rank: 3 76 | numeric_format: 3 77 | rank: 2 78 | MoverScore_es: 79 | label: MvSc. 80 | leaderboard: *DEFMOD 81 | column_group: *ES 82 | rank: 1 83 | numeric_format: 3 84 | BLEU_sense_es: 85 | label: S-BLEU 86 | leaderboard: *DEFMOD 87 | column_group: *ES 88 | rank: 2 89 | numeric_format: 3 90 | BLEU_lemma_es: 91 | label: L-BLEU 92 | leaderboard: *DEFMOD 93 | column_group: *ES 94 | rank: 3 95 | numeric_format: 3 96 | MoverScore_fr: 97 | label: MvSc. 98 | leaderboard: *DEFMOD 99 | column_group: *FR 100 | rank: 1 101 | numeric_format: 3 102 | BLEU_sense_fr: 103 | label: S-BLEU 104 | leaderboard: *DEFMOD 105 | column_group: *FR 106 | rank: 2 107 | numeric_format: 3 108 | BLEU_lemma_fr: 109 | label: L-BLEU 110 | leaderboard: *DEFMOD 111 | column_group: *FR 112 | rank: 3 113 | numeric_format: 3 114 | MoverScore_it: 115 | label: MvSc. 116 | leaderboard: *DEFMOD 117 | column_group: *IT 118 | rank: 1 119 | numeric_format: 3 120 | BLEU_sense_it: 121 | label: S-BLEU 122 | leaderboard: *DEFMOD 123 | column_group: *IT 124 | rank: 2 125 | numeric_format: 3 126 | BLEU_lemma_it: 127 | label: L-BLEU 128 | leaderboard: *DEFMOD 129 | column_group: *IT 130 | rank: 3 131 | numeric_format: 3 132 | MoverScore_ru: 133 | label: MvSc. 134 | leaderboard: *DEFMOD 135 | column_group: *RU 136 | rank: 1 137 | numeric_format: 3 138 | BLEU_sense_ru: 139 | label: S-BLEU 140 | leaderboard: *DEFMOD 141 | column_group: *RU 142 | rank: 2 143 | numeric_format: 3 144 | BLEU_lemma_ru: 145 | label: L-BLEU 146 | leaderboard: *DEFMOD 147 | column_group: *RU 148 | rank: 3 149 | numeric_format: 3 150 | MSE_en_sgns: 151 | label: MSE 152 | leaderboard: *REVDICT_SGNS 153 | column_group: *EN 154 | rank: 1 155 | numeric_format: 3 156 | sort: asc 157 | cos_en_sgns: 158 | label: Cos 159 | leaderboard: *REVDICT_SGNS 160 | column_group: *EN 161 | rank: 2 162 | numeric_format: 3 163 | rnk_en_sgns: 164 | label: Rank 165 | leaderboard: *REVDICT_SGNS 166 | column_group: *EN 167 | rank: 3 168 | numeric_format: 3 169 | sort: asc 170 | MSE_es_sgns: 171 | label: MSE 172 | leaderboard: *REVDICT_SGNS 173 | column_group: *ES 174 | rank: 1 175 | numeric_format: 3 176 | sort: asc 177 | cos_es_sgns: 178 | label: Cos 179 | leaderboard: *REVDICT_SGNS 180 | column_group: *ES 181 | rank: 2 182 | numeric_format: 3 183 | rnk_es_sgns: 184 | label: Rank 185 | leaderboard: *REVDICT_SGNS 186 | column_group: *ES 187 | rank: 3 188 | numeric_format: 3 189 | sort: asc 190 | MSE_fr_sgns: 191 | label: MSE 192 | leaderboard: *REVDICT_SGNS 193 | column_group: *FR 194 | rank: 1 195 | numeric_format: 3 196 | sort: asc 197 | cos_fr_sgns: 198 | label: Cos 199 | leaderboard: *REVDICT_SGNS 200 | column_group: *FR 201 | rank: 2 202 | numeric_format: 3 203 | rnk_fr_sgns: 204 | label: Rank 205 | leaderboard: *REVDICT_SGNS 206 | column_group: *FR 207 | rank: 3 208 | numeric_format: 3 209 | sort: asc 210 | MSE_it_sgns: 211 | label: MSE 212 | leaderboard: *REVDICT_SGNS 213 | column_group: *IT 214 | rank: 1 215 | numeric_format: 3 216 | sort: asc 217 | cos_it_sgns: 218 | label: Cos 219 | leaderboard: *REVDICT_SGNS 220 | column_group: *IT 221 | rank: 2 222 | numeric_format: 3 223 | rnk_it_sgns: 224 | label: Rank 225 | leaderboard: *REVDICT_SGNS 226 | column_group: *IT 227 | rank: 3 228 | numeric_format: 3 229 | sort: asc 230 | MSE_ru_sgns: 231 | label: MSE 232 | leaderboard: *REVDICT_SGNS 233 | column_group: *RU 234 | rank: 1 235 | numeric_format: 3 236 | sort: asc 237 | cos_ru_sgns: 238 | label: Cos 239 | leaderboard: *REVDICT_SGNS 240 | column_group: *RU 241 | rank: 2 242 | numeric_format: 3 243 | rnk_ru_sgns: 244 | label: Rank 245 | leaderboard: *REVDICT_SGNS 246 | column_group: *RU 247 | rank: 3 248 | numeric_format: 3 249 | sort: asc 250 | MSE_en_electra: 251 | label: MSE 252 | leaderboard: *REVDICT_ELECTRA 253 | column_group: *EN 254 | rank: 1 255 | numeric_format: 3 256 | sort: asc 257 | cos_en_electra: 258 | label: Cos 259 | leaderboard: *REVDICT_ELECTRA 260 | column_group: *EN 261 | rank: 2 262 | numeric_format: 3 263 | rnk_en_electra: 264 | label: Rank 265 | leaderboard: *REVDICT_ELECTRA 266 | column_group: *EN 267 | rank: 3 268 | numeric_format: 3 269 | sort: asc 270 | MSE_fr_electra: 271 | label: MSE 272 | leaderboard: *REVDICT_ELECTRA 273 | column_group: *FR 274 | rank: 1 275 | numeric_format: 3 276 | sort: asc 277 | cos_fr_electra: 278 | label: Cos 279 | leaderboard: *REVDICT_ELECTRA 280 | column_group: *FR 281 | rank: 2 282 | numeric_format: 3 283 | rnk_fr_electra: 284 | label: Rank 285 | leaderboard: *REVDICT_ELECTRA 286 | column_group: *FR 287 | rank: 3 288 | numeric_format: 3 289 | sort: asc 290 | MSE_ru_electra: 291 | label: MSE 292 | leaderboard: *REVDICT_ELECTRA 293 | column_group: *RU 294 | rank: 1 295 | numeric_format: 3 296 | sort: asc 297 | cos_ru_electra: 298 | label: Cos 299 | leaderboard: *REVDICT_ELECTRA 300 | column_group: *RU 301 | rank: 2 302 | numeric_format: 3 303 | rnk_ru_electra: 304 | label: Rank 305 | leaderboard: *REVDICT_ELECTRA 306 | column_group: *RU 307 | rank: 3 308 | numeric_format: 3 309 | sort: asc 310 | MSE_en_char: 311 | label: MSE 312 | leaderboard: *REVDICT_CHAR 313 | column_group: *EN 314 | rank: 1 315 | numeric_format: 3 316 | sort: asc 317 | cos_en_char: 318 | label: Cos 319 | leaderboard: *REVDICT_CHAR 320 | column_group: *EN 321 | rank: 2 322 | numeric_format: 3 323 | rnk_en_char: 324 | label: Rank 325 | leaderboard: *REVDICT_CHAR 326 | column_group: *EN 327 | rank: 3 328 | numeric_format: 3 329 | sort: asc 330 | MSE_es_char: 331 | label: MSE 332 | leaderboard: *REVDICT_CHAR 333 | column_group: *ES 334 | rank: 1 335 | numeric_format: 3 336 | sort: asc 337 | cos_es_char: 338 | label: Cos 339 | leaderboard: *REVDICT_CHAR 340 | column_group: *ES 341 | rank: 2 342 | numeric_format: 3 343 | rnk_es_char: 344 | label: Rank 345 | leaderboard: *REVDICT_CHAR 346 | column_group: *ES 347 | rank: 3 348 | numeric_format: 3 349 | sort: asc 350 | MSE_fr_char: 351 | label: MSE 352 | leaderboard: *REVDICT_CHAR 353 | column_group: *FR 354 | rank: 1 355 | numeric_format: 3 356 | sort: asc 357 | cos_fr_char: 358 | label: Cos 359 | leaderboard: *REVDICT_CHAR 360 | column_group: *FR 361 | rank: 2 362 | numeric_format: 3 363 | rnk_fr_char: 364 | label: Rank 365 | leaderboard: *REVDICT_CHAR 366 | column_group: *FR 367 | rank: 3 368 | numeric_format: 3 369 | sort: asc 370 | MSE_it_char: 371 | label: MSE 372 | leaderboard: *REVDICT_CHAR 373 | column_group: *IT 374 | rank: 1 375 | numeric_format: 3 376 | sort: asc 377 | cos_it_char: 378 | label: Cos 379 | leaderboard: *REVDICT_CHAR 380 | column_group: *IT 381 | rank: 2 382 | numeric_format: 3 383 | rnk_it_char: 384 | label: Rank 385 | leaderboard: *REVDICT_CHAR 386 | column_group: *IT 387 | rank: 3 388 | numeric_format: 3 389 | sort: asc 390 | MSE_ru_char: 391 | label: MSE 392 | leaderboard: *REVDICT_CHAR 393 | column_group: *RU 394 | rank: 1 395 | numeric_format: 3 396 | sort: asc 397 | cos_ru_char: 398 | label: Cos 399 | leaderboard: *REVDICT_CHAR 400 | column_group: *RU 401 | rank: 2 402 | numeric_format: 3 403 | rnk_ru_char: 404 | label: Rank 405 | leaderboard: *REVDICT_CHAR 406 | column_group: *RU 407 | rank: 3 408 | numeric_format: 3 409 | sort: asc 410 | -------------------------------------------------------------------------------- /baseline_archs/code/data.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import count 3 | import json 4 | import random 5 | import tempfile 6 | 7 | import torch 8 | from torch.nn.utils.rnn import pad_sequence 9 | from torch.utils.data import DataLoader, Dataset, Sampler 10 | 11 | import sentencepiece as spm 12 | 13 | BOS = "" 14 | EOS = "" 15 | PAD = "" 16 | UNK = "" 17 | 18 | SUPPORTED_ARCHS = ("sgns", "char") 19 | 20 | # A dataset is a container object for the actual data 21 | class JSONDataset(Dataset): 22 | """Reads a CODWOE JSON dataset""" 23 | 24 | def __init__( 25 | self, 26 | file, 27 | vocab=None, 28 | freeze_vocab=False, 29 | maxlen=256, 30 | spm_model_name=None, 31 | train_spm=False, 32 | ): 33 | """ 34 | Construct a torch.utils.data.Dataset compatible with torch data API and 35 | codwoe data. 36 | args: `file` the path to the dataset file 37 | `vocab` a dictionary mapping strings to indices 38 | `freeze_vocab` whether to update vocabulary, or just replace unknown items with OOV token 39 | `maxlen` the maximum number of tokens per gloss 40 | `spm_model_name` create and use this sentencepiece model instead of whitespace tokenization 41 | """ 42 | self.use_spm = spm_model_name is not None 43 | if vocab is None: 44 | self.vocab = defaultdict(count().__next__) 45 | else: 46 | self.vocab = defaultdict(count(len(vocab)).__next__) 47 | self.vocab.update(vocab) 48 | pad, eos, bos, unk = ( 49 | self.vocab[PAD], 50 | self.vocab[EOS], 51 | self.vocab[BOS], 52 | self.vocab[UNK], 53 | ) 54 | if freeze_vocab: 55 | self.vocab = dict(vocab) 56 | with open(file, "r") as istr: 57 | self.items = json.load(istr) 58 | if self.use_spm: 59 | if train_spm: 60 | with tempfile.NamedTemporaryFile(mode="w+") as temp_fp: 61 | for gls in (j["gloss"] for j in self.items): 62 | print(gls, file=temp_fp) 63 | temp_fp.seek(0) 64 | spm.SentencePieceTrainer.train( 65 | input=temp_fp.name, 66 | model_prefix=spm_model_name, 67 | vocab_size=15000, 68 | pad_id=pad, 69 | pad_piece=PAD, 70 | eos_id=eos, 71 | eos_piece=EOS, 72 | bos_id=bos, 73 | bos_piece=BOS, 74 | unk_id=unk, 75 | unk_piece=UNK, 76 | ) 77 | self.spm_model = spm.SentencePieceProcessor( 78 | model_file=f"{spm_model_name}.model" 79 | ) 80 | # preparse data 81 | for json_dict in self.items: 82 | # in definition modeling test datasets, gloss targets are absent 83 | if "gloss" in json_dict: 84 | if spm_model_name: 85 | json_dict["gloss_tensor"] = torch.tensor( 86 | self.spm_model.encode( 87 | json_dict["gloss"], add_eos=True, add_bos=True 88 | ) 89 | ) 90 | else: 91 | json_dict["gloss_tensor"] = torch.tensor( 92 | [bos] 93 | + [ 94 | self.vocab[word] 95 | if not freeze_vocab 96 | else self.vocab.get(word, unk) 97 | for word in json_dict["gloss"].split() 98 | ] 99 | + [eos] 100 | ) 101 | if maxlen: 102 | json_dict["gloss_tensor"] = json_dict["gloss_tensor"][:maxlen] 103 | # in reverse dictionary test datasets, vector targets are absent 104 | for arch in SUPPORTED_ARCHS: 105 | if arch in json_dict: 106 | json_dict[f"{arch}_tensor"] = torch.tensor(json_dict[arch]) 107 | if "electra" in json_dict: 108 | json_dict["electra_tensor"] = torch.tensor(json_dict["electra"]) 109 | if self.use_spm: 110 | self.vocab = { 111 | self.spm_model.id_to_piece(idx): idx 112 | for idx in range(self.spm_model.get_piece_size()) 113 | } 114 | 115 | self.has_gloss = "gloss" in self.items[0] 116 | self.has_vecs = SUPPORTED_ARCHS[0] in self.items[0] 117 | self.has_electra = "electra" in self.items[0] 118 | self.itos = sorted(self.vocab, key=lambda w: self.vocab[w]) 119 | 120 | def __len__(self): 121 | return len(self.items) 122 | 123 | def __getitem__(self, index): 124 | return self.items[index] 125 | 126 | # we're adding this method to simplify the code in our predictions of 127 | # glosses 128 | @torch.no_grad() 129 | def decode(self, tensor): 130 | """Convert a sequence of indices (possibly batched) to tokens""" 131 | if tensor.dim() == 2: 132 | # we have batched tensors of shape [Seq x Batch] 133 | decoded = [] 134 | for tensor_ in tensor.t(): 135 | decoded.append(self.decode(tensor_)) 136 | return decoded 137 | else: 138 | ids = [i.item() for i in tensor if i != self.vocab[PAD]] 139 | if self.itos[ids[0]] == BOS: ids = ids[1:] 140 | if self.itos[ids[-1]] == EOS: ids = ids[:-1] 141 | if self.use_spm: 142 | return self.spm_model.decode(ids) 143 | return " ".join(self.itos[i] for i in ids) 144 | 145 | def save(self, file): 146 | torch.save(self, file) 147 | 148 | @staticmethod 149 | def load(file): 150 | return torch.load(file) 151 | 152 | 153 | # A sampler allows you to define how to select items from your Dataset. Torch 154 | # provides a number of default Sampler classes 155 | class TokenSampler(Sampler): 156 | """Produce batches with up to `batch_size` tokens in each batch""" 157 | 158 | def __init__( 159 | self, dataset, batch_size=150, size_fn=len, drop_last=False, shuffle=True 160 | ): 161 | """ 162 | args: `dataset` a torch.utils.data.Dataset (iterable style) 163 | `batch_size` the maximum number of tokens in a batch 164 | `size_fn` a callable that yields the number of tokens in a dataset item 165 | `drop_last` if True and the data can't be divided in exactly the right number of batch, drop the last batch 166 | `shuffle` if True, shuffle between every iteration 167 | """ 168 | self.dataset = dataset 169 | self.batch_size = batch_size 170 | self.size_fn = size_fn 171 | self._len = None 172 | self.drop_last = drop_last 173 | self.shuffle = True 174 | 175 | def __iter__(self): 176 | indices = range(len(self.dataset)) 177 | if self.shuffle: 178 | indices = list(indices) 179 | random.shuffle(indices) 180 | i = 0 181 | selected = [] 182 | numel = 0 183 | longest_len = 0 184 | for i in indices: 185 | if numel + self.size_fn(self.dataset[i]) > self.batch_size: 186 | if selected: 187 | yield selected 188 | selected = [] 189 | numel = 0 190 | numel += self.size_fn(self.dataset[i]) 191 | selected.append(i) 192 | if selected and not self.drop_last: 193 | yield selected 194 | 195 | def __len__(self): 196 | if self._len is None: 197 | self._len = round( 198 | sum(self.size_fn(self.dataset[i]) for i in range(len(self.dataset))) 199 | / self.batch_size 200 | ) 201 | return self._len 202 | 203 | 204 | # DataLoaders give access to an iterator over the dataset, using a sampling 205 | # strategy as defined through a Sampler. 206 | def get_dataloader(dataset, batch_size=200, shuffle=True): 207 | """produce dataloader. 208 | args: `dataset` a torch.utils.data.Dataset (iterable style) 209 | `batch_size` the maximum number of tokens in a batch 210 | `shuffle` if True, shuffle between every iteration 211 | """ 212 | # some constants for the closures 213 | has_gloss = dataset.has_gloss 214 | has_vecs = dataset.has_vecs 215 | has_electra = dataset.has_electra 216 | PAD_idx = dataset.vocab[PAD] 217 | 218 | # the collate function has to convert a list of dataset items into a batch 219 | def do_collate(json_dicts): 220 | """collates example into a dict batch; produces ands pads tensors""" 221 | batch = defaultdict(list) 222 | for jdict in json_dicts: 223 | for key in jdict: 224 | batch[key].append(jdict[key]) 225 | if has_gloss: 226 | batch["gloss_tensor"] = pad_sequence( 227 | batch["gloss_tensor"], padding_value=PAD_idx, batch_first=False 228 | ) 229 | if has_vecs: 230 | for arch in SUPPORTED_ARCHS: 231 | batch[f"{arch}_tensor"] = torch.stack(batch[f"{arch}_tensor"]) 232 | if has_electra: 233 | batch["electra_tensor"] = torch.stack(batch["electra_tensor"]) 234 | return dict(batch) 235 | 236 | if dataset.has_gloss: 237 | # we try to keep the amount of gloss tokens roughly constant across all 238 | # batches. 239 | def do_size_item(item): 240 | """retrieve tensor size, so as to batch items per elements""" 241 | return item["gloss_tensor"].numel() 242 | 243 | return DataLoader( 244 | dataset, 245 | collate_fn=do_collate, 246 | batch_sampler=TokenSampler( 247 | dataset, batch_size=batch_size, size_fn=do_size_item, shuffle=shuffle 248 | ), 249 | ) 250 | else: 251 | # there's no gloss, hence no gloss tokens, so we use a default batching 252 | # strategy. 253 | return DataLoader( 254 | dataset, collate_fn=do_collate, batch_size=batch_size, shuffle=shuffle 255 | ) 256 | 257 | 258 | def get_train_dataset(train_file, spm_model_path, save_dir): 259 | if (save_dir / "train_dataset.pt").is_file(): 260 | dataset = JSONDataset.load(save_dir / "train_dataset.pt") 261 | else: 262 | dataset = JSONDataset( 263 | train_file, 264 | spm_model_name=spm_model_path.with_suffix(""), 265 | train_spm=not spm_model_path.with_suffix(".model").is_file(), 266 | ) 267 | dataset.save(save_dir / "train_dataset.pt") 268 | return dataset 269 | 270 | 271 | def get_dev_dataset(dev_file, spm_model_path, save_dir, train_dataset=None): 272 | if (save_dir / "dev_dataset.pt").is_file(): 273 | dataset = JSONDataset.load(save_dir / "dev_dataset.pt") 274 | else: 275 | dataset = JSONDataset( 276 | dev_file, spm_model_name=spm_model_path, train_spm=False 277 | ) 278 | dataset.save(save_dir / "dev_dataset.pt") 279 | return dataset 280 | -------------------------------------------------------------------------------- /baseline_archs/code/revdict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import json 4 | import logging 5 | import pathlib 6 | import pprint 7 | import secrets 8 | 9 | import skopt 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | from torch.utils.tensorboard import SummaryWriter 16 | 17 | import tqdm 18 | 19 | import data 20 | import models 21 | 22 | logger = logging.getLogger(pathlib.Path(__file__).name) 23 | logger.setLevel(logging.DEBUG) 24 | handler = logging.StreamHandler(tqdm.tqdm) 25 | handler.terminator = "" 26 | handler.setFormatter( 27 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 28 | ) 29 | logger.addHandler(handler) 30 | 31 | 32 | def get_parser( 33 | parser=argparse.ArgumentParser( 34 | description="Run a reverse dictionary baseline.\nThe task consists in reconstructing an embedding from the glosses listed in the datasets" 35 | ), 36 | ): 37 | parser.add_argument( 38 | "--do_htune", 39 | action="store_true", 40 | help="whether to perform hyperparameter tuning", 41 | ) 42 | parser.add_argument( 43 | "--do_train", action="store_true", help="whether to train a model from scratch" 44 | ) 45 | parser.add_argument( 46 | "--do_pred", action="store_true", help="whether to produce predictions" 47 | ) 48 | parser.add_argument( 49 | "--train_file", type=pathlib.Path, help="path to the train file" 50 | ) 51 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file") 52 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file") 53 | parser.add_argument( 54 | "--device", 55 | type=torch.device, 56 | default=torch.device("cpu"), 57 | help="path to the train file", 58 | ) 59 | parser.add_argument( 60 | "--target_arch", 61 | type=str, 62 | default="sgns", 63 | choices=("sgns", "char", "electra"), 64 | help="embedding architecture to use as target", 65 | ) 66 | parser.add_argument( 67 | "--summary_logdir", 68 | type=pathlib.Path, 69 | default=pathlib.Path("logs") / f"revdict-baseline", 70 | help="write logs for future analysis", 71 | ) 72 | parser.add_argument( 73 | "--save_dir", 74 | type=pathlib.Path, 75 | default=pathlib.Path("models") / f"revdict-baseline", 76 | help="where to save model & vocab", 77 | ) 78 | parser.add_argument( 79 | "--spm_model_path", 80 | type=pathlib.Path, 81 | default=None, 82 | help="use sentencepiece model, if required train and save it here", 83 | ) 84 | parser.add_argument( 85 | "--pred_file", 86 | type=pathlib.Path, 87 | default=pathlib.Path("revdict-baseline-preds.json"), 88 | help="where to save predictions", 89 | ) 90 | return parser 91 | 92 | 93 | def get_search_space(): 94 | """get hyperparmeters to optimize for""" 95 | search_space = [ 96 | skopt.space.Real(1e-8, 1.0, "log-uniform", name="learning_rate"), 97 | skopt.space.Real(0.0, 1.0, "uniform", name="weight_decay"), 98 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_a"), 99 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_b"), 100 | skopt.space.Real(0.0, 0.9, "uniform", name="dropout"), 101 | skopt.space.Real(0.0, 1.0, "uniform", name="warmup_len"), 102 | skopt.space.Integer(1, 100, "log-uniform", name="batch_accum"), 103 | skopt.space.Integer(0, 5, "uniform", name="n_head_pow"), 104 | skopt.space.Integer(1, 6, "uniform", name="n_layers"), 105 | ] 106 | return search_space 107 | 108 | 109 | def train( 110 | train_file, 111 | dev_file, 112 | target_arch="sgns", 113 | summary_logdir=pathlib.Path("logs") / "revdict-htune", 114 | save_dir=pathlib.Path("models") / "revdict-baseline", 115 | device="cuda:0", 116 | spm_model_path=None, 117 | epochs=100, 118 | learning_rate=1e-4, 119 | beta1=0.9, 120 | beta2=0.999, 121 | weight_decay=1e-6, 122 | patience=5, 123 | batch_accum=1, 124 | dropout=0.3, 125 | warmup_len=0.1, 126 | n_head=4, 127 | n_layers=4, 128 | ): 129 | assert train_file is not None, "Missing dataset for training" 130 | assert dev_file is not None, "Missing dataset for development" 131 | # 1. get data, vocabulary, summary writer 132 | logger.debug("Preloading data") 133 | save_dir = save_dir / target_arch 134 | save_dir.mkdir(parents=True, exist_ok=True) 135 | ## make datasets 136 | train_dataset = data.get_train_dataset(train_file, spm_model_path, save_dir) 137 | dev_dataset = data.get_dev_dataset( 138 | dev_file, spm_model_path, save_dir, train_dataset 139 | ) 140 | 141 | ## assert they correspond to the task 142 | assert train_dataset.has_gloss, "Training dataset contains no gloss." 143 | if target_arch == "electra": 144 | assert train_dataset.has_electra, "Training datatset contains no vector." 145 | else: 146 | assert train_dataset.has_vecs, "Training datatset contains no vector." 147 | assert dev_dataset.has_gloss, "Development dataset contains no gloss." 148 | if target_arch == "electra": 149 | assert dev_dataset.has_electra, "Development dataset contains no vector." 150 | else: 151 | assert dev_dataset.has_vecs, "Development dataset contains no vector." 152 | ## make dataloader 153 | train_dataloader = data.get_dataloader(train_dataset) 154 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False) 155 | ## make summary writer 156 | summary_writer = SummaryWriter(summary_logdir) 157 | train_step = itertools.count() # to keep track of the training steps for logging 158 | 159 | # 2. construct model 160 | ## Hyperparams 161 | logger.debug("Setting up training environment") 162 | model = models.RevdictModel( 163 | dev_dataset.vocab, n_head=n_head, n_layers=n_layers, dropout=dropout 164 | ) 165 | model = model.to(device) 166 | model.train() 167 | 168 | # 3. declare optimizer & criterion 169 | ## Hyperparams 170 | optimizer = optim.AdamW( 171 | model.parameters(), 172 | lr=learning_rate, 173 | betas=(beta1, beta2), 174 | weight_decay=weight_decay, 175 | ) 176 | criterion = nn.MSELoss() 177 | 178 | vec_tensor_key = f"{target_arch}_tensor" 179 | best_mse = float("inf") 180 | strikes = 0 181 | 182 | # 4. train model 183 | epochs_range = tqdm.trange(epochs, desc="Epochs") 184 | total_steps = (len(train_dataloader) * epochs) // batch_accum 185 | scheduler = models.get_schedule( 186 | optimizer, round(total_steps * warmup_len), total_steps 187 | ) 188 | 189 | # 4. train model 190 | for epoch in epochs_range: 191 | ## train loop 192 | pbar = tqdm.tqdm( 193 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False 194 | ) 195 | optimizer.zero_grad() 196 | for i, batch in enumerate(train_dataloader): 197 | optimizer.zero_grad() 198 | gls = batch["gloss_tensor"].to(device) 199 | vec = batch[vec_tensor_key].to(device) 200 | pred = model(gls) 201 | loss = criterion(pred, vec) 202 | loss.backward() 203 | grad_remains = True 204 | step = next(train_step) 205 | if i % batch_accum == 0: 206 | optimizer.step() 207 | scheduler.step() 208 | optimizer.zero_grad() 209 | grad_remains = False 210 | summary_writer.add_scalar( 211 | "revdict-train/lr", scheduler.get_last_lr()[0], step 212 | ) 213 | # keep track of the train loss for this step 214 | with torch.no_grad(): 215 | cos_sim = F.cosine_similarity(pred, vec).mean().item() 216 | summary_writer.add_scalar("revdict-train/cos", cos_sim, step) 217 | summary_writer.add_scalar("revdict-train/mse", loss.item(), step) 218 | pbar.update(vec.size(0)) 219 | if grad_remains: 220 | optimizer.step() 221 | scheduler.step() 222 | optimizer.zero_grad() 223 | pbar.close() 224 | ## eval loop 225 | model.eval() 226 | with torch.no_grad(): 227 | sum_dev_loss = 0.0 228 | sum_cosine = 0.0 229 | pbar = tqdm.tqdm( 230 | desc=f"Eval {epoch}", 231 | total=len(dev_dataset), 232 | disable=None, 233 | leave=False, 234 | ) 235 | for batch in dev_dataloader: 236 | gls = batch["gloss_tensor"].to(device) 237 | vec = batch[vec_tensor_key].to(device) 238 | pred = model(gls) 239 | sum_dev_loss += ( 240 | F.mse_loss(pred, vec, reduction="none").mean(1).sum().item() 241 | ) 242 | sum_cosine += F.cosine_similarity(pred, vec).sum().item() 243 | pbar.update(vec.size(0)) 244 | # keep track of the average loss on dev set for this epoch 245 | new_mse = sum_dev_loss / len(dev_dataset) 246 | summary_writer.add_scalar( 247 | "revdict-dev/cos", sum_cosine / len(dev_dataset), epoch 248 | ) 249 | summary_writer.add_scalar("revdict-dev/mse", new_mse, epoch) 250 | pbar.close() 251 | if new_mse < (best_mse * 0.999): 252 | logger.debug( 253 | f"Epoch {epoch}, new best loss: {new_mse:.4f} < {best_mse:.4f}" 254 | + f" (x 0.999 = {best_mse * 0.999:.4f})" 255 | ) 256 | best_mse = new_mse 257 | strikes = 0 258 | else: 259 | strikes += 1 260 | # check result if better 261 | if not (save_dir / "best_scores.txt").is_file(): 262 | overall_best_mse = float("inf") 263 | else: 264 | with open(save_dir / "best_scores.txt", "r") as score_file: 265 | overall_best_mse = float(score_file.read()) 266 | # save result if better 267 | if new_mse < overall_best_mse: 268 | logger.debug( 269 | f"Epoch {epoch}, new overall best loss: {new_mse:.4f} < {overall_best_mse:.4f}" 270 | ) 271 | model.save(save_dir / "model.pt") 272 | with open(save_dir / "hparams.json", "w") as json_file: 273 | hparams = { 274 | "learning_rate": learning_rate, 275 | "beta1": beta1, 276 | "beta2": beta2, 277 | "weight_decay": weight_decay, 278 | } 279 | json.dump(hparams, json_file, indent=2) 280 | with open(save_dir / "best_scores.txt", "w") as score_file: 281 | print(new_mse, file=score_file) 282 | if strikes >= patience: 283 | logger.debug("Stopping early.") 284 | epochs_range.close() 285 | break 286 | model.train() 287 | # return loss for gp minimize 288 | return best_mse 289 | 290 | 291 | def pred(args): 292 | assert args.test_file is not None, "Missing dataset for test" 293 | # 1. retrieve vocab, dataset, model 294 | model = models.DefmodModel.load(args.save_dir / "model.pt") 295 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab 296 | test_dataset = data.JSONDataset( 297 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen 298 | ) 299 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1024) 300 | model.eval() 301 | vec_tensor_key = f"{args.target_arch}_tensor" 302 | assert test_dataset.has_gloss, "File is not usable for the task" 303 | # 2. make predictions 304 | predictions = [] 305 | with torch.no_grad(): 306 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset)) 307 | for batch in test_dataloader: 308 | vecs = model(batch["gloss_tensor"].to(args.device)).cpu() 309 | for id, vec in zip(batch["id"], vecs.unbind()): 310 | predictions.append( 311 | {"id": id, args.target_arch: vec.view(-1).cpu().tolist()} 312 | ) 313 | pbar.update(vecs.size(0)) 314 | pbar.close() 315 | with open(args.pred_file, "w") as ostr: 316 | json.dump(predictions, ostr) 317 | 318 | 319 | def main(args): 320 | assert not (args.do_train and args.do_htune), "Conflicting options" 321 | 322 | if args.do_train: 323 | logger.debug("Performing revdict training") 324 | train( 325 | args.train_file, 326 | args.dev_file, 327 | args.target_arch, 328 | args.summary_logdir, 329 | args.save_dir, 330 | args.device, 331 | ) 332 | elif args.do_htune: 333 | logger.debug("Performing revdict hyperparameter tuning") 334 | search_space = get_search_space() 335 | 336 | @skopt.utils.use_named_args(search_space) 337 | def gp_train(**hparams): 338 | logger.debug(f"Hyperparams sampled:\n{pprint.pformat(hparams)}") 339 | best_loss = train( 340 | train_file=args.train_file, 341 | dev_file=args.dev_file, 342 | target_arch=args.target_arch, 343 | summary_logdir=args.summary_logdir 344 | / args.target_arch 345 | / secrets.token_urlsafe(8), 346 | save_dir=args.save_dir, 347 | device=args.device, 348 | spm_model_path=args.spm_model_path, 349 | learning_rate=hparams["learning_rate"], 350 | beta1=min(hparams["beta_a"], hparams["beta_b"]), 351 | beta2=max(hparams["beta_a"], hparams["beta_b"]), 352 | weight_decay=hparams["weight_decay"], 353 | batch_accum=hparams["batch_accum"], 354 | warmup_len=hparams["warmup_len"], 355 | n_head=2 ** hparams["n_head_pow"], 356 | n_layers=hparams["n_layers"], 357 | ) 358 | return best_loss 359 | 360 | result = skopt.gp_minimize(gp_train, search_space) 361 | args.save_dir = args.save_dir / args.target_arch 362 | skopt.dump(result, args.save_dir / "results.pkl", store_objective=False) 363 | 364 | if args.do_pred: 365 | logger.debug("Performing revdict prediction") 366 | pred(args) 367 | 368 | 369 | if __name__ == "__main__": 370 | args = get_parser().parse_args() 371 | main(args) 372 | -------------------------------------------------------------------------------- /baseline_archs/code/models.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import data 8 | 9 | 10 | def get_schedule( 11 | optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1 12 | ): 13 | """From Huggingface""" 14 | 15 | def lr_lambda(current_step): 16 | if current_step < num_warmup_steps: 17 | return float(current_step) / float(max(1, num_warmup_steps)) 18 | progress = float(current_step - num_warmup_steps) / float( 19 | max(1, num_training_steps - num_warmup_steps) 20 | ) 21 | return max( 22 | 0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)) 23 | ) 24 | 25 | return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch) 26 | 27 | 28 | class PositionalEncoding(nn.Module): 29 | """From PyTorch""" 30 | 31 | def __init__(self, d_model, dropout=0.1, max_len=4096): 32 | super(PositionalEncoding, self).__init__() 33 | self.dropout = nn.Dropout(p=dropout) 34 | pe = torch.zeros(max_len, d_model) 35 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 36 | div_term = torch.exp( 37 | torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model) 38 | ) 39 | pe[:, 0::2] = torch.sin(position * div_term) 40 | pe[:, 1::2] = torch.cos(position * div_term) 41 | pe = pe.unsqueeze(0).transpose(0, 1) 42 | self.register_buffer("pe", pe) 43 | 44 | def forward(self, x): 45 | x = x + self.pe[: x.size(0)] 46 | return self.dropout(x) 47 | 48 | 49 | class DefmodModel(nn.Module): 50 | """A transformer architecture for Definition Modeling.""" 51 | 52 | def __init__( 53 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=256 54 | ): 55 | super(DefmodModel, self).__init__() 56 | self.d_model = d_model 57 | self.padding_idx = vocab[data.PAD] 58 | self.eos_idx = vocab[data.EOS] 59 | self.maxlen = maxlen 60 | 61 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx) 62 | self.positional_encoding = PositionalEncoding( 63 | d_model, dropout=dropout, max_len=maxlen 64 | ) 65 | encoder_layer = nn.TransformerEncoderLayer( 66 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2 67 | ) 68 | self.transformer_encoder = nn.TransformerEncoder( 69 | encoder_layer, num_layers=n_layers 70 | ) 71 | self.v_proj = nn.Linear(d_model, len(vocab)) 72 | # initializing weights 73 | for name, param in self.named_parameters(): 74 | if param.dim() > 1: 75 | nn.init.xavier_uniform_(param) 76 | elif "bias" in name: 77 | nn.init.zeros_(param) 78 | else: # gain parameters of the layer norm 79 | nn.init.ones_(param) 80 | 81 | def generate_square_subsequent_mask(self, sz): 82 | "from Pytorch" 83 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 84 | mask = ( 85 | mask.float() 86 | .masked_fill(mask == 0, float("-inf")) 87 | .masked_fill(mask == 1, float(0.0)) 88 | ) 89 | return mask 90 | 91 | def forward(self, vector, input_sequence=None): 92 | device = next(self.parameters()).device 93 | embs = self.embedding(input_sequence) 94 | seq = torch.cat([vector.unsqueeze(0), embs], dim=0) 95 | src = self.positional_encoding(seq) 96 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device) 97 | src_key_padding_mask = torch.cat( 98 | [ 99 | torch.tensor([[False] * input_sequence.size(1)]).to(device), 100 | (input_sequence == self.padding_idx), 101 | ], 102 | dim=0, 103 | ).t() 104 | transformer_output = self.transformer_encoder( 105 | src, mask=src_mask, src_key_padding_mask=src_key_padding_mask 106 | ) 107 | v_dist = self.v_proj(transformer_output) 108 | return v_dist 109 | 110 | @staticmethod 111 | def load(file): 112 | return torch.load(file) 113 | 114 | def save(self, file): 115 | file.parent.mkdir(exist_ok=True, parents=True) 116 | torch.save(self, file) 117 | 118 | @torch.no_grad() 119 | def pred(self, vector, decode_fn=None, beam_size=64, verbose=False): 120 | # which device we should cast our variables to 121 | device = next(self.parameters()).device 122 | 123 | # how many examples are batched together 124 | batch_size = vector.size(0) 125 | 126 | # Tensors will have this shape: 127 | # [Sequence, Batch, Beam, Continuation, *] 128 | 129 | # accumulation variable, keeping track of the best beams for each batched example 130 | generated_symbols = torch.zeros(0, batch_size, beam_size, dtype=torch.long).to(device) 131 | 132 | # which beams hold a completed sequence 133 | current_beam_size = 1 134 | has_stopped = torch.tensor([False] * (batch_size * current_beam_size)).to(device) 135 | 136 | # the input to kick-start the generation is the embedding, we start with the same input for each beam 137 | vector_src = vector.unsqueeze(1).expand(batch_size, current_beam_size, -1).reshape(1, batch_size * current_beam_size, -1) 138 | src = vector_src 139 | src_key_padding_mask = torch.tensor([[False] * (batch_size * current_beam_size)]).to(device) 140 | 141 | # variables needed to compute the score of each beam (geometric mean of probability of emission) 142 | logprobs = torch.zeros(batch_size, current_beam_size, dtype=torch.double).to(device) 143 | lengths = torch.zeros(batch_size * current_beam_size, dtype=torch.int).to(device) 144 | # generate tokens step by step 145 | for step_idx in range(self.maxlen): 146 | 147 | # generation mask 148 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device) 149 | # positional encoding 150 | src_pe = self.positional_encoding(src) 151 | # transformer output 152 | transformer_output = self.transformer_encoder( 153 | src_pe, mask=src_mask, src_key_padding_mask=src_key_padding_mask.t() 154 | )[-1] 155 | # distribution over the full vocabulary 156 | v_dist = self.v_proj(transformer_output) 157 | # don't generate padding tokens 158 | v_dist[...,self.padding_idx] = -float("inf") 159 | v_dist = F.log_softmax(v_dist, dim=-1) 160 | 161 | # for each beam, select the best candidate continuations 162 | new_logprobs, new_symbols = v_dist.topk(beam_size, dim=-1) 163 | # patch the output scores to zero-out items that have already stopped 164 | new_logprobs = new_logprobs.masked_fill(has_stopped.unsqueeze(-1), 0.0) 165 | # if the beam hasn't stopped, then it needs to produce at least an EOS 166 | # so we can just add one to beams that have not stopped to account for the current token 167 | lengths += (~has_stopped).int() 168 | 169 | # compute scores for each continuation 170 | ## recreate the score of the previous full sequence for all possible continuations 171 | logprobs_ = logprobs.view(batch_size * current_beam_size, 1).expand(batch_size * current_beam_size, beam_size) 172 | ## add the cost of each continuation 173 | logprobs_ = logprobs_ + new_logprobs 174 | ## average over the full sequence, ignoring padding items 175 | avg_logprobs = logprobs_ #/ lengths.unsqueeze(-1) 176 | ## select the `beam_size` best continuations overall, their matching scores will be `avg_logprobs` 177 | avg_logprobs, selected_beams = avg_logprobs.view(batch_size, current_beam_size * beam_size).topk(beam_size, dim=-1) 178 | ## select back the base score for the selected continuations 179 | logprobs = logprobs_.view(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(batch_size, beam_size) 180 | 181 | # add symbols of best continuations 182 | ## recreate the full previous sequence for all possible continuations 183 | generated_symbols_ = generated_symbols.view(-1, batch_size * current_beam_size, 1).expand(-1, batch_size * current_beam_size, beam_size) 184 | ## stack on the new symbols 185 | generated_symbols_ = torch.cat([generated_symbols_, new_symbols.unsqueeze(0)], dim=0) 186 | ## grab only the `beam_size` best continuations out of all possible continuations 187 | generated_symbols_ = generated_symbols_.view(-1, batch_size, current_beam_size * beam_size) 188 | generated_symbols = generated_symbols_.gather(-1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, beam_size)).view(step_idx + 1, batch_size, beam_size) 189 | 190 | # recompute which beams have stopped, and what their lengths are 191 | ## reconstruct the lengths of all candidate continuations 192 | lengths = lengths.view(batch_size, current_beam_size, 1).expand(batch_size, current_beam_size, beam_size) 193 | ## retrieve the lengths of the selected beam continuations 194 | lengths = lengths.reshape(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(-1) 195 | ## reconstruct the halting state of all candidate continuations 196 | has_stopped = has_stopped.view(batch_size, current_beam_size, 1).expand(batch_size, current_beam_size, beam_size) 197 | ## retrieve the halting states of selected beam continuations 198 | has_stopped = has_stopped.reshape(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(-1) 199 | 200 | # flag which beams have terminated at the current step (i.e., whether they just produced an EOS) 201 | generated_symbols = generated_symbols.view(-1, batch_size * beam_size) 202 | generated_symbols[-1] = generated_symbols[-1].masked_fill(has_stopped, self.padding_idx) 203 | has_stopped = has_stopped | (generated_symbols.view(-1, batch_size * beam_size)[-1] == self.eos_idx).view(batch_size * beam_size) 204 | 205 | # recompute padding mask on the basis of which continuations were selected 206 | src_key_padding_mask = src_key_padding_mask.view(-1, batch_size, current_beam_size, 1).expand(-1, batch_size, current_beam_size, beam_size) 207 | src_key_padding_mask = src_key_padding_mask.reshape(-1, batch_size, current_beam_size * beam_size) 208 | src_key_padding_mask = src_key_padding_mask.gather(-1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, beam_size)).view(step_idx + 1, batch_size * beam_size) 209 | src_key_padding_mask = torch.cat([src_key_padding_mask, has_stopped.unsqueeze(0)], dim=0) 210 | 211 | # produce input for the next timestep 212 | src = torch.cat([vector_src.expand(1, beam_size, -1), self.embedding(generated_symbols)], dim=0) 213 | # reshape to the familiar format 214 | generated_symbols = generated_symbols.view(-1, batch_size, beam_size) 215 | 216 | # if all beams have stopped, so do we 217 | if has_stopped.all(): 218 | break 219 | # we update the number of sustained beam at the first iteration, since we know have `beam_size` candidates. 220 | current_beam_size = beam_size 221 | 222 | # select the most likely sequence for each batched item 223 | max_scores, selected_beams = (logprobs / lengths.view(batch_size, beam_size)).topk(1, dim=1) 224 | output_sequence = generated_symbols.gather(1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, 1)) 225 | if verbose: print(decode_fn(output_sequence.squeeze(-1))) 226 | return output_sequence.squeeze(-1) 227 | 228 | 229 | class RevdictModel(nn.Module): 230 | """A transformer architecture for Definition Modeling.""" 231 | 232 | def __init__( 233 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=512 234 | ): 235 | super(RevdictModel, self).__init__() 236 | self.d_model = d_model 237 | self.padding_idx = vocab[data.PAD] 238 | self.eos_idx = vocab[data.EOS] 239 | self.maxlen = maxlen 240 | 241 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx) 242 | self.positional_encoding = PositionalEncoding( 243 | d_model, dropout=dropout, max_len=maxlen 244 | ) 245 | encoder_layer = nn.TransformerEncoderLayer( 246 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2 247 | ) 248 | self.transformer_encoder = nn.TransformerEncoder( 249 | encoder_layer, num_layers=n_layers 250 | ) 251 | self.dropout = nn.Dropout(p=dropout) 252 | self.e_proj = nn.Linear(d_model, d_model) 253 | for name, param in self.named_parameters(): 254 | if param.dim() > 1: 255 | nn.init.xavier_uniform_(param) 256 | elif "bias" in name: 257 | nn.init.zeros_(param) 258 | else: # gain parameters of the layer norm 259 | nn.init.ones_(param) 260 | 261 | def forward(self, gloss_tensor): 262 | src_key_padding_mask = gloss_tensor == self.padding_idx 263 | embs = self.embedding(gloss_tensor) 264 | src = self.positional_encoding(embs) 265 | transformer_output = self.dropout( 266 | self.transformer_encoder(src, src_key_padding_mask=src_key_padding_mask.t()) 267 | ) 268 | summed_embs = transformer_output.masked_fill( 269 | src_key_padding_mask.unsqueeze(-1), 0 270 | ).sum(dim=0) 271 | return self.e_proj(F.relu(summed_embs)) 272 | 273 | @staticmethod 274 | def load(file): 275 | return torch.load(file) 276 | 277 | def save(self, file): 278 | torch.save(self, file) 279 | 280 | 281 | def linear_combination(x, y, epsilon): 282 | return epsilon * x + (1 - epsilon) * y 283 | 284 | 285 | def reduce_loss(loss, reduction="mean"): 286 | return ( 287 | loss.mean() 288 | if reduction == "mean" 289 | else loss.sum() 290 | if reduction == "sum" 291 | else loss 292 | ) 293 | 294 | 295 | # Implementation of Label smoothing with CrossEntropy and ignore_index 296 | class LabelSmoothingCrossEntropy(nn.Module): 297 | def __init__(self, epsilon: float = 0.1, reduction="mean", ignore_index=-100): 298 | super().__init__() 299 | self.epsilon = epsilon 300 | self.reduction = reduction 301 | self.ignore_index = ignore_index 302 | 303 | def forward(self, preds, target): 304 | n = preds.size()[-1] 305 | log_preds = F.log_softmax(preds, dim=-1) 306 | loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction) 307 | nll = F.nll_loss( 308 | log_preds, target, reduction=self.reduction, ignore_index=self.ignore_index 309 | ) 310 | return linear_combination(loss / n, nll, self.epsilon) 311 | -------------------------------------------------------------------------------- /baseline_archs/code/defmod.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import json 4 | import logging 5 | import pathlib 6 | import pprint 7 | import secrets 8 | 9 | import skopt 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | from torch.utils.tensorboard import SummaryWriter 16 | 17 | import tqdm 18 | 19 | import data 20 | import models 21 | 22 | logger = logging.getLogger(pathlib.Path(__file__).name) 23 | logger.setLevel(logging.DEBUG) 24 | handler = logging.StreamHandler(tqdm.tqdm) 25 | handler.terminator = "" 26 | handler.setFormatter( 27 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") 28 | ) 29 | logger.addHandler(handler) 30 | 31 | 32 | def get_parser( 33 | parser=argparse.ArgumentParser(description="run a definition modeling baseline"), 34 | ): 35 | parser.add_argument( 36 | "--do_htune", 37 | action="store_true", 38 | help="whether to perform hyperparameter tuning", 39 | ) 40 | parser.add_argument( 41 | "--do_train", action="store_true", help="whether to train a model from scratch" 42 | ) 43 | parser.add_argument( 44 | "--do_pred", action="store_true", help="whether to produce predictions" 45 | ) 46 | parser.add_argument( 47 | "--train_file", type=pathlib.Path, help="path to the train file" 48 | ) 49 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file") 50 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file") 51 | parser.add_argument( 52 | "--device", 53 | type=torch.device, 54 | default=torch.device("cpu"), 55 | help="path to the train file", 56 | ) 57 | parser.add_argument( 58 | "--source_arch", 59 | type=str, 60 | default="sgns", 61 | choices=("sgns", "char", "electra"), 62 | help="embedding architecture to use as source", 63 | ) 64 | parser.add_argument( 65 | "--summary_logdir", 66 | type=pathlib.Path, 67 | default=pathlib.Path("logs") / "defmod-baseline", 68 | help="write logs for future analysis", 69 | ) 70 | parser.add_argument( 71 | "--save_dir", 72 | type=pathlib.Path, 73 | default=pathlib.Path("models") / "defmod-baseline", 74 | help="where to save model & vocab", 75 | ) 76 | parser.add_argument( 77 | "--spm_model_path", 78 | type=pathlib.Path, 79 | default=None, 80 | help="use sentencepiece model, if required train and save it here", 81 | ) 82 | parser.add_argument( 83 | "--pred_file", 84 | type=pathlib.Path, 85 | default=pathlib.Path("defmod-baseline-preds.json"), 86 | help="where to save predictions", 87 | ) 88 | return parser 89 | 90 | 91 | def get_search_space(): 92 | """get hyperparmeters to optimize for""" 93 | search_space = [ 94 | skopt.space.Real(1e-8, 1.0, "log-uniform", name="learning_rate"), 95 | skopt.space.Real(0.0, 1.0, "uniform", name="weight_decay"), 96 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_a"), 97 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_b"), 98 | skopt.space.Real(0.0, 0.9, "uniform", name="dropout"), 99 | skopt.space.Real(0.0, 1.0, "uniform", name="warmup_len"), 100 | skopt.space.Real(0.0, 1.0 - 1e-8, "uniform", name="label_smoothing"), 101 | skopt.space.Integer(1, 100, "log-uniform", name="batch_accum"), 102 | skopt.space.Integer(0, 5, "uniform", name="n_head_pow"), 103 | skopt.space.Integer(1, 6, "uniform", name="n_layers"), 104 | ] 105 | return search_space 106 | 107 | 108 | def train( 109 | train_file, 110 | dev_file, 111 | source_arch="sgns", 112 | summary_logdir=pathlib.Path("logs") / "defmod-htune", 113 | save_dir=pathlib.Path("models") / "defmod-baseline", 114 | device="cuda:0", 115 | spm_model_path=None, 116 | epochs=100, 117 | learning_rate=1e-4, 118 | beta1=0.9, 119 | beta2=0.999, 120 | weight_decay=1e-6, 121 | patience=5, 122 | batch_accum=1, 123 | dropout=0.3, 124 | warmup_len=0.1, 125 | label_smoothing=0.1, 126 | n_head=4, 127 | n_layers=4, 128 | ): 129 | assert train_file is not None, "Missing dataset for training" 130 | assert dev_file is not None, "Missing dataset for development" 131 | 132 | # 1. get data, vocabulary, summary writer 133 | logger.debug("Preloading data") 134 | save_dir = save_dir / source_arch 135 | save_dir.mkdir(parents=True, exist_ok=True) 136 | ## make datasets 137 | train_dataset = data.get_train_dataset(train_file, spm_model_path, save_dir) 138 | dev_dataset = data.get_dev_dataset( 139 | dev_file, spm_model_path, save_dir, train_dataset 140 | ) 141 | ## assert they correspond to the task 142 | assert train_dataset.has_gloss, "Training dataset contains no gloss." 143 | if source_arch == "electra": 144 | assert train_dataset.has_electra, "Training datatset contains no vector." 145 | else: 146 | assert train_dataset.has_vecs, "Training datatset contains no vector." 147 | assert dev_dataset.has_gloss, "Development dataset contains no gloss." 148 | if source_arch == "electra": 149 | assert dev_dataset.has_electra, "Development dataset contains no vector." 150 | else: 151 | assert dev_dataset.has_vecs, "Development dataset contains no vector." 152 | ## make dataloader 153 | train_dataloader = data.get_dataloader(train_dataset) 154 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False) 155 | ## make summary writer 156 | summary_writer = SummaryWriter(summary_logdir) 157 | train_step = itertools.count() # to keep track of the training steps for logging 158 | 159 | # 2. construct model 160 | logger.debug("Setting up training environment") 161 | model = models.DefmodModel( 162 | dev_dataset.vocab, n_head=n_head, n_layers=n_layers, dropout=dropout 163 | ) 164 | model = model.to(device) 165 | model.train() 166 | 167 | # 3. declare optimizer & criterion 168 | ## Hyperparams 169 | optimizer = optim.AdamW( 170 | model.parameters(), 171 | lr=learning_rate, 172 | betas=(beta1, beta2), 173 | weight_decay=weight_decay, 174 | ) 175 | xent_criterion = nn.CrossEntropyLoss(ignore_index=model.padding_idx) 176 | if label_smoothing > 0.0: 177 | smooth_criterion = models.LabelSmoothingCrossEntropy( 178 | ignore_index=model.padding_idx, epsilon=label_smoothing 179 | ) 180 | else: 181 | smooth_criterion = xent_criterion 182 | 183 | vec_tensor_key = f"{source_arch}_tensor" 184 | best_xent = float("inf") 185 | strikes = 0 186 | 187 | # 4. train model 188 | epochs_range = tqdm.trange(epochs, desc="Epochs") 189 | total_steps = (len(train_dataloader) * epochs) // batch_accum 190 | scheduler = models.get_schedule( 191 | optimizer, round(total_steps * warmup_len), total_steps 192 | ) 193 | for epoch in epochs_range: 194 | ## train loop 195 | pbar = tqdm.tqdm( 196 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False 197 | ) 198 | optimizer.zero_grad() 199 | for i, batch in enumerate(train_dataloader): 200 | vec = batch[vec_tensor_key].to(device) 201 | gls = batch["gloss_tensor"].to(device) 202 | pred = model(vec, gls[:-1]) 203 | loss = smooth_criterion(pred.view(-1, pred.size(-1)), gls.view(-1)) 204 | loss.backward() 205 | grad_remains = True 206 | step = next(train_step) 207 | if i % batch_accum == 0: 208 | optimizer.step() 209 | scheduler.step() 210 | optimizer.zero_grad() 211 | grad_remains = False 212 | summary_writer.add_scalar( 213 | "defmod-train/lr", scheduler.get_last_lr()[0], step 214 | ) 215 | # keep track of the train loss for this step 216 | with torch.no_grad(): 217 | tokens = gls != model.padding_idx 218 | acc = ( 219 | ((pred.argmax(-1) == gls) & tokens).float().sum() / tokens.sum() 220 | ).item() 221 | xent_unsmoothed = xent_criterion( 222 | pred.view(-1, pred.size(-1)), gls.view(-1) 223 | ) 224 | summary_writer.add_scalar("defmod-train/xent_smooth", loss.item(), step) 225 | summary_writer.add_scalar("defmod-train/xent", xent_unsmoothed, step) 226 | summary_writer.add_scalar("defmod-train/acc", acc, step) 227 | pbar.update(vec.size(0)) 228 | if grad_remains: 229 | optimizer.step() 230 | scheduler.step() 231 | optimizer.zero_grad() 232 | pbar.close() 233 | ## eval loop 234 | model.eval() 235 | with torch.no_grad(): 236 | sum_dev_loss = 0.0 237 | sum_acc = 0 238 | ntoks = 0 239 | pbar = tqdm.tqdm( 240 | desc=f"Eval {epoch}", 241 | total=len(dev_dataset), 242 | disable=None, 243 | leave=False, 244 | ) 245 | for batch in dev_dataloader: 246 | vec = batch[vec_tensor_key].to(device) 247 | gls = batch["gloss_tensor"].to(device) 248 | pred = model(vec, gls[:-1]) 249 | sum_dev_loss += F.cross_entropy( 250 | pred.view(-1, pred.size(-1)), 251 | gls.view(-1), 252 | reduction="sum", 253 | ignore_index=model.padding_idx, 254 | ).item() 255 | tokens = gls != model.padding_idx 256 | ntoks += tokens.sum().item() 257 | sum_acc += ((pred.argmax(-1) == gls) & tokens).sum().item() 258 | pbar.update(vec.size(0)) 259 | 260 | # keep track of the average loss & acc on dev set for this epoch 261 | new_xent = sum_dev_loss / ntoks 262 | summary_writer.add_scalar("defmod-dev/xent", new_xent, epoch) 263 | summary_writer.add_scalar("defmod-dev/acc", sum_acc / ntoks, epoch) 264 | pbar.close() 265 | if new_xent < (best_xent * 0.999): 266 | logger.debug( 267 | f"Epoch {epoch}, new best loss: {new_xent:.4f} < {best_xent:.4f}" 268 | + f" (x 0.999 = {best_xent * 0.999:.4f})" 269 | ) 270 | best_xent = new_xent 271 | strikes = 0 272 | else: 273 | strikes += 1 274 | # check result if better 275 | if not (save_dir / "best_scores.txt").is_file(): 276 | overall_best_xent = float("inf") 277 | else: 278 | with open(save_dir / "best_scores.txt", "r") as score_file: 279 | overall_best_xent = float(score_file.read()) 280 | # save result if better 281 | if new_xent < overall_best_xent: 282 | logger.debug( 283 | f"Epoch {epoch}, new overall best loss: {new_xent:.4f} < {overall_best_xent:.4f}" 284 | ) 285 | model.save(save_dir / "model.pt") 286 | with open(save_dir / "hparams.json", "w") as json_file: 287 | hparams = { 288 | "learning_rate": learning_rate, 289 | "beta1": beta1, 290 | "beta2": beta2, 291 | "weight_decay": weight_decay, 292 | } 293 | json.dump(hparams, json_file, indent=2) 294 | with open(save_dir / "best_scores.txt", "w") as score_file: 295 | print(new_xent, file=score_file) 296 | 297 | if strikes >= patience: 298 | logger.debug("Stopping early.") 299 | epochs_range.close() 300 | break 301 | model.train() 302 | # return loss for gp minimize 303 | return best_xent 304 | 305 | 306 | def pred(args): 307 | assert args.test_file is not None, "Missing dataset for test" 308 | # 1. retrieve vocab, dataset, model 309 | model = models.DefmodModel.load(args.save_dir / "model.pt") 310 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab 311 | test_dataset = data.JSONDataset( 312 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen, spm_model_name=args.spm_model_path 313 | ) 314 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1) 315 | model.eval() 316 | vec_tensor_key = f"{args.source_arch}_tensor" 317 | if args.source_arch == "electra": 318 | assert test_dataset.has_electra, "File is not usable for the task" 319 | else: 320 | assert test_dataset.has_vecs, "File is not usable for the task" 321 | # 2. make predictions 322 | predictions = [] 323 | with torch.no_grad(): 324 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset), disable=None) 325 | for batch in test_dataloader: 326 | sequence = model.pred(batch[vec_tensor_key].to(args.device), decode_fn=test_dataset.decode, verbose=False) 327 | for id, gloss in zip(batch["id"], test_dataset.decode(sequence)): 328 | predictions.append({"id": id, "gloss": gloss}) 329 | pbar.update(batch[vec_tensor_key].size(0)) 330 | pbar.close() 331 | # 3. dump predictions 332 | with open(args.pred_file, "w") as ostr: 333 | json.dump(predictions, ostr) 334 | 335 | 336 | def main(args): 337 | assert not (args.do_train and args.do_htune), "Conflicting options" 338 | if args.do_train: 339 | logger.debug("Performing defmod training") 340 | train( 341 | args.train_file, 342 | args.dev_file, 343 | args.source_arch, 344 | args.summary_logdir, 345 | args.save_dir, 346 | args.device, 347 | ) 348 | elif args.do_htune: 349 | logger.debug("Performing defmod hyperparameter tuning") 350 | search_space = get_search_space() 351 | 352 | @skopt.utils.use_named_args(search_space) 353 | def gp_train(**hparams): 354 | logger.debug(f"Hyperparams sampled:\n{pprint.pformat(hparams)}") 355 | best_loss = train( 356 | train_file=args.train_file, 357 | dev_file=args.dev_file, 358 | source_arch=args.source_arch, 359 | summary_logdir=args.summary_logdir 360 | / args.source_arch 361 | / secrets.token_urlsafe(8), 362 | save_dir=args.save_dir, 363 | device=args.device, 364 | spm_model_path=args.spm_model_path, 365 | learning_rate=hparams["learning_rate"], 366 | beta1=min(hparams["beta_a"], hparams["beta_b"]), 367 | beta2=max(hparams["beta_a"], hparams["beta_b"]), 368 | weight_decay=hparams["weight_decay"], 369 | batch_accum=hparams["batch_accum"], 370 | warmup_len=hparams["warmup_len"], 371 | label_smoothing=hparams["label_smoothing"], 372 | n_head=2 ** hparams["n_head_pow"], 373 | n_layers=hparams["n_layers"], 374 | ) 375 | return best_loss 376 | 377 | result = skopt.gp_minimize(gp_train, search_space) 378 | args.save_dir = args.save_dir / args.source_arch 379 | skopt.dump(result, args.save_dir / "results.pkl", store_objective=False) 380 | 381 | if args.do_pred: 382 | logger.debug("Performing defmod prediction") 383 | pred(args) 384 | 385 | 386 | if __name__ == "__main__": 387 | args = get_parser().parse_args() 388 | main(args) 389 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Comparing Dictionaries and Word Embeddings 2 | 3 | This is the repository for the SemEval 2022 Shared Task #1: Comparing 4 | Dictionaries and Word Embeddings (CODWOE). 5 | 6 | This repository currently contains: the configuration for the codalab 7 | competition, a Docker image to reproduce the environment, a scorer, a 8 | format-checker and baseline programs to help participants get started. 9 | 10 | Participants may be interested in the script `codwoe_entrypoint.py`. It contains 11 | a number of useful features, such as scoring submissions, a format checker and a 12 | few simple baseline architectures. It is also the exact copy of what is used on 13 | the codalab. 14 | 15 | **Datasets are no longer provided directly on this repository. The competition datasets are now available on this page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).** 16 | 17 | # What is this task? 18 | The CODWOE shared task invites you to compare two types of semantic 19 | descriptions: dictionary glosses and word embedding representations. Are these 20 | two types of representation equivalent? Can we generate one from the other? To 21 | study this question, we propose two subtracks: a **definition modeling** track 22 | (Noraset et al., 2017), where participants have to generate glosses from 23 | vectors, and a **reverse dictionary** track (Hill et al., 2016), where 24 | participants have to generate vectors from glosses. 25 | 26 | These two tracks display a number of interesting characteristics. Definition 27 | modeling is a vector-to-sequence task, the reverse dictionary task is a 28 | sequence-to-vector task—and you know that kind of thing gets NLP people swearing 29 | out loud. These tasks are also useful for explainable AI, since they involve 30 | converting human-readable data into machine-readable data and back. 31 | 32 | To get involved: check out the 33 | [codalab competition](https://competitions.codalab.org/competitions/34022). 34 | There is also a participants' 35 | ["semeval2022-dictionaries-and-word-embeddings" google group](mailto:semeval2022-dictionaries-and-word-embeddings@googlegroups.com), 36 | as well as a [discord server](https://discord.gg/y8g6qXakNs). 37 | You can reach us organizers through [this email](mailto:tmickus@atilf.fr); make 38 | sure to mention SemEval in your email object. 39 | 40 | # How hard is it? 41 | 42 | ## Official rankings 43 | 44 | Below are the official rankings for the SemEval 2022 CODWOE Shared task. 45 | More information about the submissions we received is available in this git (see the `rankings/` sub-directory). 46 | 47 | ### Definition Modeling track 48 | 49 | Below are the results for the Definition Modeling track. 50 | 51 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 52 | |---------------|--------:|--------:|--------:|--------:|--------: 53 | | Locchi | 8 | 6 | | 7 | 54 | | LingJing | 9 | 7 | 6 | 6 | 6 55 | | BLCU-ICALL | 3 | 2 | 3 | **1** | 2 56 | | IRB-NLP | 2 | **1** | **1** | 5 | 5 57 | | emukans | 5 | 4 | 4 | 4 | 3 58 | | guntis | 6 | | | | 59 | | lukechan1231 | 7 | 5 | 5 | 3 | 4 60 | | pzchen | 4 | 3 | 2 | 2 | **1** 61 | | talent404 | **1** | | | | 62 | 63 | ### Reverse Dictionary track 64 | 65 | Below are the results for the Reverse dictionary tracks. 66 | There are separate rankings, based on which targets participants have submitted. 67 | 68 | 69 | #### A. SGNS targets 70 | 71 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 72 | |------------------|--------:|--------:|--------:|--------:|--------: 73 | | Locchi | 4 | | | 4 | 74 | | BL.Research | 5 | 5 | 4 | 6 | 4 75 | | LingJing | **1** | 2 | 2 | 3 | **1** 76 | | MMG | | 3 | | | 77 | | chlrbgus321 | N/A | | | | 78 | | IRB-NLP | 3 | **1** | **1** | **1** | 2 79 | | pzchen | 2 | 4 | 3 | 2 | 3 80 | | the0ne | 7 | | | | 81 | | JSI | 8 | 7 | 6 | 7 | 6 82 | | zhwa3087 | 6 | 6 | 5 | 5 | 5 83 | 84 | #### B. ELECTRA targets 85 | 86 | | user / team | Rank EN | Rank FR | Rank RU 87 | |------------------|--------:|--------:|--------: 88 | | Locchi | 3 | | 89 | | BL.Research | 2 | 2 | 4 90 | | LingJing | 4 | 4 | 2 91 | | IRB-NLP | 5 | 3 | 3 92 | | pzchen | **1** | **1** | **1** 93 | | the0ne | 6 | | 94 | 95 | 96 | #### C. Char-based targets 97 | 98 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU 99 | |------------------|--------:|--------:|--------:|--------:|--------: 100 | | Locchi | **1** | | | 4 | 101 | | BL.Research | 2 | 2 | 2 | 3 | 4 102 | | LingJing | 7 | 5 | 5 | 6 | 5 103 | | IRB-NLP | 4 | 3 | 4 | 2 | 2 104 | | pzchen | 3 | **1** | **1** | **1** | **1** 105 | | the0ne | 5 | | | | 106 | | zhwa3087 | 6 | 4 | 3 | 5 | 3 107 | 108 | 109 | 110 | ## Baseline results 111 | Here are baseline results on the development set for the two tracks. 112 | We used the code described in `code/baseline_archs` to generate these scores. 113 | 114 | For the Reverse Dictionary track results, rows will correspond to different targets. 115 | On the other hand, rows of the Definition Modeling table below correspond to different inputs to the system. 116 | Scores were computed using the scoring script provided in this git (`code/score.py`). 117 | 118 | ### Reverse Dictionary track 119 | 120 | | | MSE | Cosine | Ranking 121 | |------------|--------:|--------:|--------: 122 | | en SGNS | 0.91092 | 0.15132 | 0.49030 123 | | en char | 0.14776 | 0.79006 | 0.50218 124 | | en electra | 1.41287 | 0.84283 | 0.49849 125 | | es SGNS | 0.92996 | 0.20406 | 0.49912 126 | | es char | 0.56952 | 0.80634 | 0.49778 127 | | fr SGNS | 1.14050 | 0.19774 | 0.49052 128 | | fr char | 0.39480 | 0.75852 | 0.49945 129 | | fr electra | 1.15348 | 0.85629 | 0.49784 130 | | it SGNS | 1.12536 | 0.20430 | 0.47692 131 | | it char | 0.36309 | 0.72732 | 0.49663 132 | | ru SGNS | 0.57683 | 0.25316 | 0.49008 133 | | ru char | 0.13498 | 0.82624 | 0.49451 134 | | ru electra | 0.87358 | 0.72086 | 0.49120 135 | 136 | 137 | ### Definition Modeling track 138 | 139 | | | Sense-BLEU | Lemma-BLEU | MoverScore 140 | |------------|-----------:|-----------:|-----------: 141 | | en SGNS | 0.03048 | 0.04062 | 0.08307 142 | | en char | 0.02630 | 0.03359 | 0.04531 143 | | en electra | 0.03155 | 0.04155 | 0.06732 144 | | es SGNS | 0.03528 | 0.05273 | 0.06685 145 | | es char | 0.03291 | 0.04712 | 0.06112 146 | | fr SGNS | 0.02983 | 0.04134 | 0.04036 147 | | fr char | 0.02913 | 0.03985 | 0.01935 148 | | fr electra | 0.03061 | 0.03954 | 0.03855 149 | | it SGNS | 0.04759 | 0.06910 | 0.10154 150 | | it char | 0.02532 | 0.03522 | 0.04068 151 | | ru SGNS | 0.03805 | 0.05121 | 0.11559 152 | | ru char | 0.02324 | 0.03238 | 0.07145 153 | | ru electra | 0.02987 | 0.03782 | 0.10382 154 | 155 | # Using this repository 156 | To install the exact environment used for our scripts, see the 157 | `requirements.txt` file which lists the library we used. Do 158 | note that the exact installation in the competition underwent supplementary 159 | tweaks: in particular, we patch the moverscore library to have it run on CPU. 160 | 161 | Another possibility is to use the dockerfile written for the codalab 162 | competition. You can also pull this docker image from dockerhub: 163 | [`linguistickus/codwoe`](https://hub.docker.com/r/linguistickus/codwoe). This 164 | Docker image doesn't contain the code, so you will also need to clone the 165 | repository within it; but this image will also contain our tweaks. 166 | 167 | Code useful to participants is stored in the `code/` directory. 168 | To see options a simple baseline on the definition modeling track, use: 169 | ```sh 170 | $ python3 code/codwoe_entrypoint.py defmod --help 171 | ``` 172 | To see options for a simple baseline on the reverse dictionary track, use: 173 | ```sh 174 | $ python3 code/codwoe_entrypoint.py revdict --help 175 | ``` 176 | To verify the format of a submission, run: 177 | ```sh 178 | $ python3 code/codwoe_entrypoint.py check-format $PATH_TO_SUBMISSION_FILE 179 | ``` 180 | To score a submission, use 181 | ```sh 182 | $ python3 code/codwoe_entrypoint.py score $PATH_TO_SUBMISSION_FILE --reference_files_dir $PATH_TO_DATA_DIR 183 | ``` 184 | Note that this requires the gold files, not available at the start of the 185 | competition. 186 | 187 | Other useful files to look at include `code/models.py`, where our baseline 188 | architectures are defined, and `code/data.py`, which shows how to use the JSON 189 | datasets with the PyTorch dataset API. 190 | 191 | # Using the datasets 192 | 193 | **Datasets are no longer provided directly on this repository. The competition datasets are now available on this page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).** 194 | 195 | This section details the structure of the JSON dataset file we provide. More information is available on the competition website: [link](https://competitions.codalab.org/competitions/34022#participate-get_data). 196 | 197 | ### Brief Overview 198 | 199 | As an overview, the expected usage of the datasets is as follow: 200 | + In the Definition Modeling track, we expect participants to use the embeddings ("char", "sgns", "electra") to generate the associated definition ("gloss"). 201 | + In the Reverse Dictionary track, we expect participants to use the definition ("gloss") to generate any of the associated embeddings ("char", "sgns", "electra"). 202 | 203 | 204 | ### Dataset files structure 205 | 206 | Each dataset file correspond to a data split (trial/train/dev/test) for one of the languages. 207 | 208 | Dataset files are in the JSON format. A dataset file contains a list of examples. Each example is a JSON dictionary, containing the following keys: 209 | + "id", 210 | + "gloss" 211 | + "sgns" 212 | + "char" 213 | 214 | The English, French and Russian dictionary also contain an "electra" key. 215 | 216 | As a concrete instance, here is an example from the English training dataset: 217 | ```json 218 | { 219 | "id": "en.train.2", 220 | "gloss": "A vocal genre in Hindustani classical music", 221 | "sgns": [ 222 | -0.0602365807, 223 | ... 224 | ], 225 | "char": [ 226 | -0.3631578386, 227 | ... 228 | ], 229 | "electra": [ 230 | -1.3904430866, 231 | ... 232 | ] 233 | }, 234 | ``` 235 | 236 | ### Description of contents 237 | 238 | The value associated to "id" tracks the language, data split and unique identifier for this example. 239 | 240 | The value associated to the "gloss" key is a definition, as you would find in a classical dictionary. It is to be used either the target in the Definition Modeling track, or asthe source in the Reverse Dictionary track. 241 | 242 | All other keys ("char", "sgns", "electra") correspond to embeddings, and the associated values are arrays of floats representing the components. They all can serve as targets for the Reverse Dictionary track. 243 | + "char" corresponds to character-based embeddings, computed using an auto-encoder on the spelling of a word. 244 | + "sgns" corresponds to skip-gram with negative sampling embeddings (aka. word2vec) 245 | + "electra" corresponds to Transformer-based contextualized embeddings. 246 | 247 | 248 | ### Using the dataset files 249 | 250 | Given that the data is in JSON format, it is straightforward to load it in python: 251 | 252 | ```python 253 | import json 254 | with open(PATH_TO_DATASET, "r") as file_handler: 255 | dataset = json.load(file_handler) 256 | ``` 257 | 258 | A more complete example for pytorch is available in the git repository (see here: [link](https://git.atilf.fr/tmickus/codwoe/-/blob/master/code/data.py#L18)). 259 | 260 | ### Expected output format 261 | 262 | During the evaluation phase, we will expect submissions to reconstruct the same JSON format. 263 | 264 | The test JSON files for input will be separate for each track. They will contain the "id" key, and either the "gloss" key (in the reverse dictionary track) or the embedding keys ("char" and "sgns" keys, and "electra" "key" in EN/FR/RU, in the definition modeling track). 265 | 266 | In the definition modeling track, participants should construct JSON files that contain at least the two following keys: 267 | + the original "id" 268 | + their generated "gloss" 269 | 270 | In the reverse dictionary, participants should construct JSON files that contain at least the two following keys: 271 | + the original "id", 272 | + any of the valid embeddings ("char", "sgns", or "electra" key in EN/FR/RU) 273 | 274 | Other keys can be added. More details concerning the evaluation procedure are available here: [link](https://competitions.codalab.org/competitions/34022#learn_the_details-evaluation). 275 | 276 | 277 | # Using this repository 278 | 279 | The code and data of this shared task are provided under a CC-BY-SA license. 280 | If you use thhis repository in your researcj, please cite the task paper (to appear): 281 | ``` 282 | @inproceedings{mickus-etal-2022-semeval, 283 | title = "{S}emeval-2022 Task 1: {CODWOE} {--} Comparing Dictionaries and Word Embeddings", 284 | author = "Mickus, Timothee and 285 | Van Deemter, Kees and 286 | Constant, Mathieu and 287 | Paperno, Denis", 288 | booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)", 289 | month = jul, 290 | year = "2022", 291 | address = "Seattle, United States", 292 | publisher = "Association for Computational Linguistics", 293 | url = "https://aclanthology.org/2022.semeval-1.1", 294 | pages = "1--14", 295 | abstract = "Word embeddings have advanced the state of the art in NLP across numerous tasks. Understanding the contents of dense neural representations is of utmost interest to the computational semantics community. We propose to focus on relating these opaque word vectors with human-readable definitions, as found in dictionaries This problem naturally divides into two subtasks: converting definitions into embeddings, and converting embeddings into definitions. This task was conducted in a multilingual setting, using comparable sets of embeddings trained homogeneously.", 296 | } 297 | ``` 298 | 299 | Also consider looking at participants' submissions: 300 | - [1Cademy at Semeval-2022 Task 1: Investigating the Effectiveness of Multilingual, Multitask, and Language-Agnostic Tricks for the Reverse Dictionary Task](https://aclanthology.org/2022.semeval-1.2/) 301 | - [BLCU-ICALL at SemEval-2022 Task 1: Cross-Attention Multitasking Framework for Definition Modeling](https://aclanthology.org/2022.semeval-1.3/) 302 | - [LingJing at SemEval-2022 Task 1: Multi-task Self-supervised Pre-training for Multilingual Reverse Dictionary](https://aclanthology.org/2022.semeval-1.4/) 303 | - [IRB-NLP at SemEval-2022 Task 1: Exploring the Relationship Between Words and Their Semantic Representations](https://aclanthology.org/2022.semeval-1.5/) 304 | - [TLDR at SemEval-2022 Task 1: Using Transformers to Learn Dictionaries and Representations](https://aclanthology.org/2022.semeval-1.6/) 305 | - [MMG at SemEval-2022 Task 1: A Reverse Dictionary approach based on a review of the dataset from a lexicographic perspective](https://aclanthology.org/2022.semeval-1.7/) 306 | - [Edinburgh at SemEval-2022 Task 1: Jointly Fishing for Word Embeddings and Definitions](https://aclanthology.org/2022.semeval-1.8/) 307 | - [RIGA at SemEval-2022 Task 1: Scaling Recurrent Neural Networks for CODWOE Dictionary Modeling](https://aclanthology.org/2022.semeval-1.9/) 308 | - [Uppsala University at SemEval-2022 Task 1: Can Foreign Entries Enhance an English Reverse Dictionary?](https://aclanthology.org/2022.semeval-1.10/) 309 | - [BL.Research at SemEval-2022 Task 1: Deep networks for Reverse Dictionary using embeddings and LSTM autoencoders](https://aclanthology.org/2022.semeval-1.11/) 310 | - [JSI at SemEval-2022 Task 1: CODWOE - Reverse Dictionary: Monolingual and cross-lingual approaches](https://aclanthology.org/2022.semeval-1.12/) 311 | -------------------------------------------------------------------------------- /rankings/submission_ranks/results_revdict-electra-rankings.csv: -------------------------------------------------------------------------------- 1 | user,EN MSE,EN cos,EN rank,FR MSE,FR cos,FR rank,RU MSE,RU cos,RU rank,Comments,Date,filename 2 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:09.048119+00:00,fr.test.revdict.zip 3 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:11.803362+00:00,fr.test.revdict.zip 4 | pzchen,,,,,,,,,,baseline-sgns-$LANG,2022-02-01 12:04:13.848370+00:00,baseline-out-sgns.zip 5 | pzchen,,,,,,,,,,ae-skip-share-sgns-$LANG,2022-02-01 12:04:16.166846+00:00,ae-skip-share-out-sgns.zip 6 | tthhanh,,,,,,,,,,,2022-02-01 12:04:19.286712+00:00,revdict-baseline-preds.zip 7 | pzchen,12.0,15.0,17.0,10.0,11.0,16.0,11.0,12.0,18.0,baseline-electra-$LANG,2022-02-01 12:04:21.767316+00:00,baseline-out-electra.zip 8 | Nihed_Bendahman_,,,,22.0,22.0,22.0,,,,,2022-02-01 12:04:23.848808+00:00,fr.test.revdict.zip 9 | pzchen,,,,,,,,,,baseline-char-$LANG,2022-02-01 12:04:26.228123+00:00,baseline-out-char.zip 10 | pzchen,,,,,,,,,,ae-skip-share-char-$LANG,2022-02-01 12:04:30.244614+00:00,ae-skip-share-out-char.zip 11 | pzchen,6.0,9.0,15.0,5.0,5.0,10.0,6.0,6.0,14.0,ae-skip-share-electra-$LANG,2022-02-01 12:04:33.077318+00:00,ae-skip-share-out-electra.zip 12 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:35.657573+00:00,en.test.revdict.zip 13 | Nihed_Bendahman_,,,,8.0,9.0,22.0,,,,,2022-02-01 12:04:37.804439+00:00,fr.test.revdict.zip 14 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:40.219400+00:00,en.test.revdict.zip 15 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:44.200059+00:00,es.test.revdict.zip 16 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:46.487460+00:00,ru.test.revdict.zip 17 | emukans,,,,,,,,,,,2022-02-01 12:04:48.831698+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip 18 | Nihed_Bendahman_,9.0,11.0,23.0,,,,,,,,2022-02-01 12:04:50.938249+00:00,en.test.revdict.zip 19 | Nihed_Bendahman_,,,,,,,8.0,10.0,21.0,,2022-02-01 12:04:52.935214+00:00,ru.test.revdict.zip 20 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:05:25.788336+00:00,it.test.revdict.zip 21 | Locchi,,,,,,,,,,test,2022-02-01 12:05:28.772699+00:00,en.test.revdict.predicted.json.zip 22 | lukechan1231,,,,,,,,,,,2022-02-01 12:05:30.881540+00:00,en.char.test.defmod.json.zip 23 | emukans,,,,,,,,,,,2022-02-01 12:05:35.184513+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early-en-sgns.json.zip 24 | pzchen,4.0,2.0,12.0,2.0,2.0,13.0,3.0,2.0,10.0,revdict-ensemble1-$EMBED-$LANG,2022-02-01 12:05:37.277011+00:00,ensemble1.zip 25 | emukans,,,,,,,,,,,2022-02-01 12:05:39.220654+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip 26 | emukans,,,,,,,,,,,2022-02-01 12:05:41.571356+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip 27 | emukans,,,,,,,,,,,2022-02-01 12:05:43.780984+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-sgns.json.zip 28 | emukans,,,,,,,,,,,2022-02-01 12:05:47.188910+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-char.json.zip 29 | emukans,,,,,,,,,,GRU CSLP 30 length SGNS,2022-02-01 12:05:49.582018+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_cslp-sgns.json.zip 30 | emukans,,,,,,,,,,GRU CS 30 length SGNS,2022-02-01 12:05:51.393164+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_cs-sgns.json.zip 31 | emukans,,,,,,,,,,GRU C 30 length SGNS,2022-02-01 12:05:53.548313+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_c-sgns.json.zip 32 | emukans,,,,,,,,,,GRU 2 layers 3072 50 length SGNS,2022-02-01 12:05:55.512222+00:00,defmod-gru_ulbroka_tokenizer_2_layers_3072_hidden_50_len-en-sgns.json.zip 33 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS,2022-02-01 12:05:58.681696+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden-en-sgns.json.zip 34 | emukans,,,,,,,,,,GRU 2 layers 512 30 length SGNS,2022-02-01 12:06:01.098634+00:00,defmod-gru_ulbroka_tokenizer_2_layers_512_hidden-en-sgns.json.zip 35 | emukans,,,,,,,,,,GRU 2 layers 4096 30 length SGNS,2022-02-01 12:06:03.357951+00:00,defmod-gru_ulbroka_tokenizer_2_layers_4096_hidden-en-sgns.json.zip 36 | emukans,,,,,,,,,,GRU 4 layers 2048 30 length SGNS,2022-02-01 12:06:05.612675+00:00,defmod-gru_ulbroka_tokenizer_4_layers_2048_hidden-en-sgns.json.zip 37 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:07.682648+00:00,en.char.test.defmod.json.zip 38 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:12.716454+00:00,en.char.test.defmod.json.zip 39 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:15.022199+00:00,en.char.test.defmod.json.zip 40 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length SGNS,2022-02-01 12:06:16.911037+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden-en-sgns.json.zip 41 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS,2022-02-01 12:06:18.542822+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en-sgns.json.zip 42 | pzchen,,,,,,,,,,,2022-02-01 12:06:21.095892+00:00,ae-skip-share-out-sgns.zip 43 | pzchen,12.0,15.0,17.0,10.0,11.0,16.0,11.0,12.0,18.0,,2022-02-01 12:07:33.623337+00:00,baseline-out-electra.zip 44 | pzchen,,,,,,,,,,,2022-02-01 12:07:36.254210+00:00,baseline-out-char.zip 45 | pzchen,6.0,9.0,15.0,5.0,5.0,10.0,6.0,6.0,14.0,,2022-02-01 12:07:38.601592+00:00,ae-skip-share-out-electra.zip 46 | pzchen,2.0,3.0,13.0,3.0,3.0,14.0,4.0,3.0,11.0,revdict-ensemble2-$EMBED-$LANG,2022-02-01 12:07:40.475737+00:00,ensemble2.zip 47 | tthhanh,,,,,,,,,,,2022-02-01 12:07:43.919897+00:00,en_revdict_lstm.json.zip 48 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:46.434273+00:00,en.test.revdict.zip 49 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:49.300441+00:00,it.test.revdict.zip 50 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:51.717222+00:00,ru.test.revdict.zip 51 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:53.984159+00:00,fr.test.revdict.zip 52 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:58.188691+00:00,es.test.revdict.zip 53 | emukans,,,,,,,,,,Concat,2022-02-01 12:08:00.529661+00:00,defmod-concat.json.zip 54 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:03.084179+00:00,fr.test.revdict.zip 55 | pzchen,,,,,,,,,,,2022-02-01 12:08:05.796646+00:00,ae-skip-share-out-char.zip 56 | dkorenci,,,,,,,,,,,2022-02-01 12:08:08.242852+00:00,defmod-submitV1-sgns.zip 57 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:12.003987+00:00,fr.test.revdict.zip 58 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:14.268831+00:00,it.test.revdict.zip 59 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:16.292523+00:00,nouvelles_soumissions.zip 60 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:18.543585+00:00,char_vocab.zip 61 | dkorenci,,,,,,,,,,,2022-02-01 12:08:22.584864+00:00,defmod-submitV1-char.zip 62 | dkorenci,,,,,,,,,,,2022-02-01 12:08:24.686983+00:00,defmod-submitV1-electra.zip 63 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS S,2022-02-01 12:08:26.769185+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_s-sgns.json.zip 64 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS LP,2022-02-01 12:19:41.107170+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_lp-sgns.json.zip 65 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS LPH,2022-02-01 12:19:44.238472+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_lph-sgns.json.zip 66 | tthhanh,,,,,,,,,,,2022-02-01 12:19:47.815149+00:00,en_revdict_preds.bilstm.zip 67 | tthhanh,,,,,,,,,,,2022-02-01 12:19:50.341452+00:00,es_revdict_preds.bilstm.zip 68 | tthhanh,,,,,,,,,,,2022-02-01 12:19:52.441544+00:00,fr_revdict_preds.bilstm.zip 69 | tthhanh,,,,,,,,,,,2022-02-01 12:19:55.038641+00:00,it_revdict_preds.bilstm.zip 70 | tthhanh,,,,,,,,,,,2022-02-01 12:19:57.274701+00:00,ru_revdict_preds.bilstm.zip 71 | tthhanh,,,,,,,,,,,2022-02-01 12:19:59.946250+00:00,Archive.zip 72 | guntis,,,,,,,,,,ulbroka 4 layers 3072 epoch:005,2022-02-01 12:20:03.739221+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early_epoch_005-en-sgns.zip 73 | guntis,,,,,,,,,,ulbroka 4 layers 3072 epoch:193,2022-02-01 12:20:06.186445+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early_save04-en-sgns.zip 74 | guntis,,,,,,,,,,ulbroka 4 3072 electra epoch:55,2022-02-01 12:20:08.343455+00:00,defmod-gru_ulbroka_4_layers_3072_epoch_055-en-electra.zip 75 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:20:10.800772+00:00,nv_soumissions.zip 76 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:20:13.580339+00:00,nouv_soumissions.zip 77 | guntis,,,,,,,,,,ulbroka 4 3072 char epoch:56,2022-02-01 12:20:16.273107+00:00,defmod-gru_ulbroka_4_layers_3072_epoch_056-en-char.zip 78 | aardoiz,,,,,,,,,,,2022-02-01 12:20:19.295783+00:00,test_preds.zip 79 | guntis,,,,,,,,,,ulbroka 4 3072 electra epoch:005,2022-02-01 12:20:21.912198+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-005.zip 80 | pzchen,4.0,2.0,12.0,2.0,2.0,13.0,3.0,2.0,10.0,,2022-02-01 12:20:24.239699+00:00,ensemble1.zip 81 | pzchen,,,,,,,,,,,2022-02-01 12:20:26.428237+00:00,baseline-out-sgns.zip 82 | Nihed_Bendahman_,15.0,24.0,3.0,16.0,21.0,3.0,15.0,20.0,8.0,,2022-02-01 12:20:30.617090+00:00,soumissions_27-01-2022.zip 83 | chlrbgus321,,,,,,,,,,,2022-02-01 12:20:33.006741+00:00,revdict.zip 84 | emukans,,,,,,,,,,GRU 2 layers 500 tokenizer 1024 30 length SGNS LPH,2022-02-01 12:20:36.193554+00:00,defmod-gru_500_tokenizer_2_layers_1024_hidden_30_len-en_lp-sgns.json.zip 85 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 5 ep,2022-02-01 12:20:38.338631+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-sgns.json.zip 86 | pzchen,,,,,,,,,,ngrams,2022-02-01 12:20:40.511309+00:00,ngram.zip 87 | tthhanh,,,,,,,,,,,2022-02-01 12:49:02.633573+00:00,en_combined.json.zip 88 | tthhanh,,,,,,,,,,,2022-02-01 12:49:05.449378+00:00,Archive.zip 89 | Nihed_Bendahman_,7.0,12.0,23.0,6.0,7.0,22.0,9.0,8.0,21.0,,2022-02-01 12:49:07.558292+00:00,soumissions_28-01-2022.zip 90 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 15 ep,2022-02-01 12:49:10.444052+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-sgns.json 2.zip 91 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length CHAR LPH 15 ep,2022-02-01 12:49:12.536420+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-en_lp-char.json.zip 92 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length SGNS LPH 15 ep RU,2022-02-01 12:49:15.463661+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-ru_lp-sgns.json.zip 93 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length ELECTRA LPH 15 ep,2022-02-01 12:49:19.448679+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-en_lp-electra.json.zip 94 | the0ne,,,,,,,,,,test submission: revdict sgns,2022-02-01 12:49:22.274702+00:00,revdict_sgns_results.zip 95 | Nihed_Bendahman_,15.0,24.0,3.0,16.0,21.0,3.0,15.0,20.0,8.0,,2022-02-01 12:49:24.564797+00:00,S1_test_revdict.zip 96 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:49:26.862551+00:00,S2_test_revdict.zip 97 | Nihed_Bendahman_,9.0,11.0,23.0,8.0,9.0,22.0,8.0,10.0,21.0,,2022-02-01 12:49:29.192438+00:00,S3_test_revdict.zip 98 | the0ne,,,,,,,,,,test submission: revdict char,2022-02-01 12:49:31.648469+00:00,revdict_char_results.zip 99 | the0ne,25.0,5.0,23.0,,,,,,,test submission: revdict electra,2022-02-01 12:49:33.989521+00:00,revdict_electra_results.zip 100 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:40.765121+00:00,en.sgns.test.defmod.json.zip 101 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:43.015657+00:00,es.char.test.defmod.json.zip 102 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:45.130238+00:00,fr.sgns.test.defmod.json.zip 103 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:47.544503+00:00,it.sgns.test.defmod.json.zip 104 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:49.519259+00:00,ru.sgns.test.defmod.json.zip 105 | zhwa3087,,,,,,,,,,"2nd 106 | training: three languages. 107 | tricks: workpiece tokenizer trained on these three languages, and added language token when tokenizating the gloss.",2022-02-01 12:49:51.497139+00:00,predict1.json.zip 108 | aardoiz,,,,,,,,,,,2022-02-01 12:49:54.167640+00:00,test_preds_v2.zip 109 | tthhanh,,,,,,,,,,,2022-02-01 12:49:56.804493+00:00,Archive_en_cross.zip 110 | tthhanh,,,,,,,,,,,2022-02-01 12:49:58.780544+00:00,Archive_es_cross.zip 111 | tthhanh,,,,,,,,,,,2022-02-01 12:50:01.331861+00:00,Archive_fr_cross.zip 112 | the0ne,,,,,,,,,,multitask finetuning: sgns,2022-02-01 12:50:04.181990+00:00,revdict_sgns_results.zip 113 | tthhanh,,,,,,,,,,,2022-02-01 12:58:49.927108+00:00,Archive_fr_cross.zip 114 | the0ne,,,,,,,,,,multitask finetuning: char,2022-02-01 12:58:52.282929+00:00,revdict_char_results.zip 115 | the0ne,22.0,7.0,23.0,,,,,,,multitask finetuning: electra,2022-02-01 12:58:54.422590+00:00,revdict_electra_results.zip 116 | tthhanh,,,,,,,,,,,2022-02-01 12:58:56.632536+00:00,Archive_it_cross.zip 117 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 5 ep RU,2022-02-01 12:58:58.792452+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-sgns.json.zip 118 | tthhanh,,,,,,,,,,,2022-02-01 12:59:00.750771+00:00,Archive_ru_cross.zip 119 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep RU,2022-02-01 12:59:02.750990+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-electra.json.zip 120 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep EN,2022-02-01 12:59:04.934403+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-electra.json.zip 121 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length CHAR LPH 7 ep RU,2022-02-01 12:59:07.087390+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-char.json.zip 122 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep FR,2022-02-01 12:59:10.298601+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-fr_lp-electra.json.zip 123 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 6 ep IT,2022-02-01 12:59:12.655912+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-it_lp-sgns.json.zip 124 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length CHAR LPH 6 ep EN,2022-02-01 12:59:14.921223+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-char.json.zip 125 | Locchi,1.0,13.0,10.0,,,,,,,,2022-02-01 12:59:17.545753+00:00,en.test.revdict.predicted.zip 126 | Locchi,,,,,,,,,,,2022-02-01 12:59:21.423987+00:00,it.test.revdict.predicted.zip 127 | dkorenci,,,,,,,,,,,2022-02-01 12:59:24.548433+00:00,defmod-submitV2-allvec-8K.zip 128 | dkorenci,,,,,,,,,,,2022-02-01 12:59:26.793534+00:00,defmod-submitV2-sgns-8K.zip 129 | dkorenci,,,,,,,,,,,2022-02-01 12:59:29.383576+00:00,defmod-submitV2-allvec-5K.zip 130 | dkorenci,,,,,,,,,,,2022-02-01 12:59:31.531125+00:00,defmod-submitV2-sgns-5K.zip 131 | emukans,,,,,,,,,,,2022-02-01 12:59:33.615316+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-es_lp-sgns.json.zip 132 | WENGSYX,13.0,4.0,9.0,11.0,6.0,11.0,1.0,4.0,12.0,,2022-02-01 12:59:36.019237+00:00,en.test.defmod.zip 133 | Locchi,,,,,,,,,,,2022-02-01 12:59:40.379830+00:00,en.test.defmod.predicted.zip 134 | Locchi,,,,,,,,,,,2022-02-01 12:59:43.120882+00:00,en.test.defmod.predicted_v2.zip 135 | Locchi,,,,,,,,,,,2022-02-01 12:59:45.666414+00:00,it.test.defmod.predicted.zip 136 | Locchi,,,,,,,,,,,2022-02-01 12:59:48.464254+00:00,es.test.defmod.predicted.zip 137 | dkorenci,18.0,20.0,6.0,12.0,14.0,5.0,12.0,7.0,2.0,,2022-02-01 13:05:59.964077+00:00,revdict-v8k-b1024-avg.zip 138 | dkorenci,16.0,18.0,8.0,13.0,16.0,8.0,13.0,14.0,5.0,,2022-02-01 13:06:02.298756+00:00,revdict-v8k-b2048-avg.zip 139 | dkorenci,17.0,19.0,4.0,17.0,17.0,6.0,16.0,13.0,3.0,,2022-02-01 13:06:04.740677+00:00,revdict-v8k-b4096-avg.zip 140 | dkorenci,19.0,21.0,7.0,14.0,15.0,7.0,17.0,15.0,6.0,,2022-02-01 13:06:07.035962+00:00,revdict-v8k-b8192-avg.zip 141 | dkorenci,20.0,22.0,5.0,18.0,18.0,4.0,19.0,21.0,4.0,,2022-02-01 13:06:09.260075+00:00,revdict-v8k-b2048-avg-plat-e150.zip 142 | guntis,,,,,,,,,,electra 093,2022-02-01 13:06:11.475542+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-093.zip 143 | guntis,,,,,,,,,,sgns 315,2022-02-01 13:06:14.002128+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early-en-sgns-315.zip 144 | dkorenci,21.0,25.0,1.0,19.0,19.0,1.0,18.0,16.0,1.0,,2022-02-01 13:06:16.518200+00:00,revdict-v8k-b2048-mt7-avg-plat.zip 145 | guntis,,,,,,,,,,electra 085,2022-02-01 13:06:20.442528+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-085.zip 146 | WENGSYX,24.0,17.0,25.0,21.0,13.0,18.0,21.0,18.0,16.0,,2022-02-01 13:06:23.136127+00:00,results.zip 147 | the0ne,10.0,6.0,23.0,,,,,,,revdict contrastive,2022-02-01 13:06:25.470976+00:00,revdict_contrastive.zip 148 | dkorenci,,,,,,,,,,,2022-02-01 13:06:27.560421+00:00,defmod-submitV2-CF-allvec-sgns-8K.zip 149 | dkorenci,,,,,,,,,,,2022-02-01 13:06:29.686010+00:00,defmod-submitV2-CF-allvec-sgns-5K.zip 150 | talent404,,,,,,,,,,english defmod,2022-02-01 13:06:34.144091+00:00,test.json.zip 151 | zhwa3087,,,,,,,,,,"3rd 152 | training: five languages 153 | tricks: Unigram tokenizer trained on these five languages, language tokens, and dynamic weights averaging losses for sgns and char.",2022-02-01 13:06:36.534003+00:00,3rd.zip 154 | zhwa3087,,,,,,,,,,"final 155 | training: five languages 156 | tricks: Unigram tokenizer trained on these five languages, language tokens, and dynamic weights averaging losses for sgns and char.",2022-02-01 13:06:38.780846+00:00,final.zip 157 | guntis,,,,,,,,,,electra 101,2022-02-01 13:06:43.743485+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-101.zip 158 | dkorenci,,,,,,,,,,,2022-02-01 13:06:46.158191+00:00,defmod-submit-V3-allvec.zip 159 | dkorenci,,,,,,,,,,,2022-02-01 13:06:48.604489+00:00,defmod-submit-V3-sgns.zip 160 | dkorenci,,,,,,,,,,,2022-02-01 13:06:50.621731+00:00,defmod-submit-V4-allvec-gru.zip 161 | dkorenci,,,,,,,,,,,2022-02-01 13:06:56.750144+00:00,defmod-submit-V4-allvec-lstm.zip 162 | WENGSYX,24.0,17.0,25.0,21.0,13.0,18.0,21.0,18.0,16.0,,2022-02-01 21:42:34.383443+00:00,results.zip 163 | dkorenci,,,,,,,,,,,2022-02-01 10:31:20.217420+00:00,defmod-submitV3-CF-allvec-sgns.zip 164 | dkorenci,,,,,,,,,,,2022-02-01 11:40:22.727710+00:00,defmod-submitV4-CF-allvec-lstm-gru.zip 165 | cunliang.kong,,,,,,,,,,,2022-02-01 11:55:05.593202+00:00,results.zip 166 | cunliang.kong,,,,,,,,,,,2022-02-01 11:55:05.602946+00:00,results.zip 167 | --------------------------------------------------------------------------------