├── codalab
├── scoring_program
├── competition
│ ├── codwoe-logo.png
│ ├── data.html
│ ├── terms_and_conditions.html
│ ├── overview.html
│ ├── evaluation.html
│ └── competition.yaml
├── README.md
└── Makefile
├── code
├── metadata
├── codwoe_entrypoint.py
├── check_output.py
├── models.py
├── data.py
├── revdict.py
├── defmod.py
└── score.py
├── .gitignore
├── baseline_archs
├── code
│ ├── metadata
│ ├── codwoe_entrypoint.py
│ ├── check_output.py
│ ├── score.py
│ ├── data.py
│ ├── revdict.py
│ ├── models.py
│ └── defmod.py
└── README.md
├── rankings
├── final_rankings
│ ├── revdict-electra_rankings-per-users.csv
│ ├── defmod_rankings-per-users.csv
│ ├── revdict-sgns_rankings-per-users.csv
│ └── revdict-char_rankings-per-users.csv
├── make_rankings_defmod.py
├── make_rankings_char.py
├── make_rankings_electra.py
├── make_rankings_sgns.py
├── README.md
└── submission_ranks
│ └── results_revdict-electra-rankings.csv
├── requirements.txt
├── docker
└── Dockerfile
├── data
└── README.md
└── README.md
/codalab/scoring_program:
--------------------------------------------------------------------------------
1 | ../code/
--------------------------------------------------------------------------------
/codalab/competition/codwoe-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimotheeMickus/codwoe/HEAD/codalab/competition/codwoe-logo.png
--------------------------------------------------------------------------------
/code/metadata:
--------------------------------------------------------------------------------
1 | command: python3 $program/codwoe_entrypoint.py score $input/res/ --reference_files_dir $input/ref/ --output_file $output
2 | description: run scoring program on any data
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .venv/
3 | .tb/
4 | data/*
5 | *.json
6 | *.pt
7 | *.zip
8 | scores.txt
9 | !data/README.md
10 | !data/trial-data_all.zip
11 | !data/train-data_all.zip
12 |
--------------------------------------------------------------------------------
/baseline_archs/code/metadata:
--------------------------------------------------------------------------------
1 | command: python3 $program/codwoe_entrypoint.py score $input/res/ --reference_files_dir $input/ref/ --output_file $output
2 | description: run scoring program on any data
3 |
--------------------------------------------------------------------------------
/codalab/README.md:
--------------------------------------------------------------------------------
1 | # CodaLab website configuration
2 |
3 | Adapted from the Sample CodaLab competition for SemEval.
4 |
5 | The scoring program is a symlink from the code directory in the root of this
6 | repository.
7 |
8 | The reference does not indicate a field in the YAML
9 |
--------------------------------------------------------------------------------
/codalab/Makefile:
--------------------------------------------------------------------------------
1 | competition/scoring_program.zip: scoring_program/*
2 | cd scoring_program && zip ./scoring_program.zip metadata *.py && cd .. && mv scoring_program/scoring_program.zip ./competition/
3 |
4 | competition.zip: competition/* competition/scoring_program.zip
5 | cd competition && zip ../competition.zip * && cd ..
6 |
7 | submission.zip: submission/*
8 | cd submission && zip ../submission.zip * && cd ..
9 |
--------------------------------------------------------------------------------
/rankings/final_rankings/revdict-electra_rankings-per-users.csv:
--------------------------------------------------------------------------------
1 | user,EN MSE,EN cos,EN rank,FR MSE,FR cos,FR rank,RU MSE,RU cos,RU rank,Avg EN,Avg FR,Avg RU,Rank EN,Rank FR,Rank RU
2 | Locchi,1.0,13.0,10.0,,,,,,,8.0,,,3,,
3 | Nihed_Bendahman_,7.0,11.0,3.0,6.0,7.0,3.0,8.0,8.0,8.0,7.0,5.333333333333333,8.0,2,2,4
4 | WENGSYX,13.0,4.0,9.0,11.0,6.0,11.0,1.0,4.0,12.0,8.666666666666666,9.333333333333334,5.666666666666667,4,4,2
5 | aardoiz,,,,,,,,,,,,,,,
6 | chlrbgus321,,,,,,,,,,,,,,,
7 | cunliang.kong,,,,,,,,,,,,,,,
8 | dkorenci,16.0,18.0,1.0,12.0,14.0,1.0,12.0,7.0,1.0,11.666666666666666,9.0,6.666666666666667,5,3,3
9 | emukans,,,,,,,,,,,,,,,
10 | guntis,,,,,,,,,,,,,,,
11 | lukechan1231,,,,,,,,,,,,,,,
12 | pzchen,2.0,2.0,12.0,2.0,2.0,10.0,3.0,2.0,10.0,5.333333333333333,4.666666666666667,5.0,1,1,1
13 | talent404,,,,,,,,,,,,,,,
14 | the0ne,10.0,5.0,23.0,,,,,,,12.666666666666666,,,6,,
15 | tthhanh,,,,,,,,,,,,,,,
16 | zhwa3087,,,,,,,,,,,,,,,
17 |
--------------------------------------------------------------------------------
/rankings/make_rankings_defmod.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | METRICS = ['MvSc.', 'S-BLEU', 'L-BLEU']
3 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU']
4 | df = pd.read_csv('res_defmod.csv')
5 | def get_sorted_vals(colname):
6 | return sorted(df[colname].dropna(), reverse=True)
7 | for colname in [f"{lang} {metric}" for lang in LANGS for metric in METRICS]:
8 | sorted_vals = get_sorted_vals(colname)
9 | def float_to_rank(cell):
10 | if pd.isna(cell): return cell
11 | return sum(i >= cell for i in sorted_vals)
12 | df[colname] = df[colname].apply(float_to_rank)
13 | df.to_csv('results_defmod.csv', index=False)
14 | df_ranks = df.groupby('user').min()
15 | for lang in LANGS:
16 | def get_mean_rank(row):
17 | metrics = [row[f"{lang} {metric}"] for metric in METRICS]
18 | if any(map(pd.isna, metrics)): return pd.NA
19 | return sum(metrics) / len(metrics)
20 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1)
21 | del df_ranks['Date']
22 | del df_ranks['filename']
23 | df_ranks.to_csv('defmod_rankings-per-users.csv')
24 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.12.0
2 | cachetools==4.2.2
3 | certifi==2020.12.5
4 | chardet==4.0.0
5 | click==8.0.1
6 | cycler==0.10.0
7 | filelock==3.0.12
8 | google-auth==1.30.0
9 | google-auth-oauthlib==0.4.4
10 | grpcio==1.37.1
11 | huggingface-hub==0.0.12
12 | idna==2.10
13 | joblib==1.0.1
14 | kiwisolver==1.3.1
15 | Markdown==3.3.4
16 | matplotlib==3.4.2
17 | moverscore==1.0.3
18 | nltk==3.6.7
19 | numpy>=1.20.3
20 | oauthlib==3.1.0
21 | packaging==21.0
22 | Pillow==8.3.0
23 | portalocker==2.3.0
24 | protobuf==3.17.0
25 | pyasn1==0.4.8
26 | pyasn1-modules==0.2.8
27 | pyemd
28 | pyparsing==2.4.7
29 | python-dateutil==2.8.1
30 | PyYAML==5.4.1
31 | regex==2022.1.18
32 | requests==2.25.1
33 | requests-oauthlib==1.3.0
34 | rsa==4.7.2
35 | sacremoses==0.0.45
36 | sentencepiece==0.1.96
37 | six==1.16.0
38 | tensorboard==2.5.0
39 | tensorboard-data-server==0.6.1
40 | tensorboard-plugin-wit==1.8.0
41 | tokenizers==0.8.1rc2
42 | torch==1.8.1
43 | tqdm==4.60.0
44 | transformers==3.1.0
45 | typing==3.7.4.3
46 | typing-extensions==3.10.0.0
47 | urllib3==1.26.4
48 | Werkzeug==2.0.1
49 |
--------------------------------------------------------------------------------
/rankings/make_rankings_char.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | METRICS = {
4 | 'MSE':False,
5 | 'cos':True,
6 | 'rank':False,
7 | }
8 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU']
9 |
10 | df = pd.read_csv('submission_scores/res_revdict-char.csv')
11 | def get_sorted_vals(colname):
12 | return sorted(df[colname].dropna(), reverse=False)
13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]:
14 | colname = f"{lang} {metric}"
15 | sorted_vals = get_sorted_vals(colname)
16 | to_maximize = METRICS[metric]
17 | def float_to_rank(cell):
18 | if pd.isna(cell):
19 | return cell
20 | if to_maximize:
21 | return sum(i >= cell for i in sorted_vals)
22 | return sum(i <= cell for i in sorted_vals)
23 |
24 | df[colname] = df[colname].apply(float_to_rank)
25 | df.to_csv('submission_ranks/results_revdict-char-rankings.csv', index=False)
26 | df_ranks = df.groupby('user').min()
27 | for lang in LANGS:
28 | def get_mean_rank(row):
29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS]
30 | if any(map(pd.isna, metrics)): return pd.NA
31 | return sum(metrics) / len(metrics)
32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1)
33 | del df_ranks['Date']
34 | del df_ranks['filename']
35 | df_ranks.to_csv('final_rankings/revdict-char_rankings-per-users.csv')
36 |
--------------------------------------------------------------------------------
/rankings/make_rankings_electra.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | METRICS = {
4 | 'MSE':False,
5 | 'cos':True,
6 | 'rank':False,
7 | }
8 | LANGS = ['EN', 'FR', 'RU']
9 |
10 | df = pd.read_csv('submission_scores/res_revdict-electra.csv')
11 | def get_sorted_vals(colname):
12 | return sorted(df[colname].dropna(), reverse=False)
13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]:
14 | colname = f"{lang} {metric}"
15 | sorted_vals = get_sorted_vals(colname)
16 | to_maximize = METRICS[metric]
17 | def float_to_rank(cell):
18 | if pd.isna(cell):
19 | return cell
20 | if to_maximize:
21 | return sum(i >= cell for i in sorted_vals)
22 | return sum(i <= cell for i in sorted_vals)
23 |
24 | df[colname] = df[colname].apply(float_to_rank)
25 | df.to_csv('submission_ranks/results_revdict-electra-rankings.csv', index=False)
26 | df_ranks = df.groupby('user').min()
27 | for lang in LANGS:
28 | def get_mean_rank(row):
29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS]
30 | if any(map(pd.isna, metrics)): return pd.NA
31 | return sum(metrics) / len(metrics)
32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1)
33 | del df_ranks['Date']
34 | del df_ranks['filename']
35 | df_ranks.to_csv('final_rankings/revdict-electra_rankings-per-users.csv')
36 |
--------------------------------------------------------------------------------
/rankings/make_rankings_sgns.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | METRICS = {
4 | 'MSE':False,
5 | 'cos':True,
6 | 'rank':False,
7 | }
8 | LANGS = ['EN', 'ES', 'FR', 'IT', 'RU']
9 |
10 | df = pd.read_csv('submission_scores/res_revdict-sgns.csv')
11 | def get_sorted_vals(colname):
12 | return sorted(df[colname].dropna(), reverse=False)
13 | for lang, metric in [(lang, metric) for lang in LANGS for metric in METRICS]:
14 | colname = f"{lang} {metric}"
15 | sorted_vals = get_sorted_vals(colname)
16 | to_maximize = METRICS[metric]
17 | def float_to_rank(cell):
18 | if pd.isna(cell):
19 | return cell
20 | if to_maximize:
21 | return sum(i >= cell for i in sorted_vals)
22 | return sum(i <= cell for i in sorted_vals)
23 |
24 | df[colname] = df[colname].apply(float_to_rank)
25 | df.to_csv('submission_ranks/results_revdict-sgns-rankings.csv', index=False)
26 | df_ranks = df.groupby('user').min()
27 | for lang in LANGS:
28 | def get_mean_rank(row):
29 | metrics = [row[f"{lang} {metric}"] for metric in METRICS]
30 | if any(map(pd.isna, metrics)): return pd.NA
31 | return sum(metrics) / len(metrics)
32 | df_ranks[f"Rank {lang}"] = df_ranks.apply(get_mean_rank, axis=1)
33 | del df_ranks['Date']
34 | del df_ranks['filename']
35 | df_ranks.to_csv('final_rankings/revdict-sgns_rankings-per-users.csv')
36 |
--------------------------------------------------------------------------------
/code/codwoe_entrypoint.py:
--------------------------------------------------------------------------------
1 | import defmod, revdict, check_output, score
2 |
3 | if __name__ == "__main__":
4 | import argparse
5 |
6 | parser = argparse.ArgumentParser(description="demo script for participants")
7 | subparsers = parser.add_subparsers(dest="command", required=True)
8 | parser_defmod = defmod.get_parser(
9 | parser=subparsers.add_parser(
10 | "defmod", help="run a definition modeling baseline"
11 | )
12 | )
13 | parser_revdict = revdict.get_parser(
14 | parser=subparsers.add_parser(
15 | "revdict", help="run a reverse dictionary baseline"
16 | )
17 | )
18 | parser_check_output = check_output.get_parser(
19 | parser=subparsers.add_parser(
20 | "check-format", help="check the format of a submission file"
21 | )
22 | )
23 | parser_score = score.get_parser(
24 | parser=subparsers.add_parser("score", help="evaluate a submission")
25 | )
26 | args = parser.parse_args()
27 | if args.command == "defmod":
28 | defmod.main(args)
29 | elif args.command == "revdict":
30 | revdict.main(args)
31 | elif args.command == "check-format":
32 | check_output.main(args.submission_file)
33 | elif args.command == "score":
34 | score.main(args)
35 |
--------------------------------------------------------------------------------
/baseline_archs/code/codwoe_entrypoint.py:
--------------------------------------------------------------------------------
1 | import defmod, revdict, check_output, score
2 |
3 | if __name__ == "__main__":
4 | import argparse
5 |
6 | parser = argparse.ArgumentParser(description="demo script for participants")
7 | subparsers = parser.add_subparsers(dest="command", required=True)
8 | parser_defmod = defmod.get_parser(
9 | parser=subparsers.add_parser(
10 | "defmod", help="run a definition modeling baseline"
11 | )
12 | )
13 | parser_revdict = revdict.get_parser(
14 | parser=subparsers.add_parser(
15 | "revdict", help="run a reverse dictionary baseline"
16 | )
17 | )
18 | parser_check_output = check_output.get_parser(
19 | parser=subparsers.add_parser(
20 | "check-format", help="check the format of a submission file"
21 | )
22 | )
23 | parser_score = score.get_parser(
24 | parser=subparsers.add_parser("score", help="evaluate a submission")
25 | )
26 | args = parser.parse_args()
27 | if args.command == "defmod":
28 | defmod.main(args)
29 | elif args.command == "revdict":
30 | revdict.main(args)
31 | elif args.command == "check-format":
32 | check_output.main(args.submission_file)
33 | elif args.command == "score":
34 | score.main(args)
35 |
--------------------------------------------------------------------------------
/rankings/final_rankings/defmod_rankings-per-users.csv:
--------------------------------------------------------------------------------
1 | user,EN MvSc.,EN S-BLEU,EN L-BLEU,ES MvSc.,ES S-BLEU,ES L-BLEU,FR MvSc.,FR S-BLEU,FR L-BLEU,IT MvSc.,IT S-BLEU,IT L-BLEU,RU MvSc.,RU S-BLEU,RU L-BLEU,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU
2 | Locchi,58.0,43.0,44.0,27.0,28.0,28.0,,,,15.0,28.0,28.0,,,,48.333333333333336,27.666666666666668,,23.666666666666668,,8,6,,7,
3 | Nihed_Bendahman_,,,,,,,,,,,,,,,,,,,,,,,,,
4 | WENGSYX,76.0,75.0,75.0,28.0,29.0,29.0,31.0,32.0,32.0,28.0,13.0,12.0,37.0,36.0,36.0,75.33333333333333,28.666666666666668,31.666666666666668,17.666666666666668,36.333333333333336,9,7,6,6,6
5 | aardoiz,,,,,,,,,,,,,,,,,,,,,,,,,
6 | chlrbgus321,,,,,,,,,,,,,,,,,,,,,,,,,
7 | cunliang.kong,2.0,9.0,9.0,2.0,13.0,14.0,12.0,6.0,4.0,2.0,2.0,2.0,2.0,4.0,8.0,6.666666666666667,9.666666666666666,7.333333333333333,2.0,4.666666666666667,3,2,3,1,2
8 | dkorenci,12.0,1.0,2.0,6.0,1.0,1.0,2.0,2.0,8.0,7.0,15.0,15.0,18.0,18.0,18.0,5.0,2.6666666666666665,4.0,12.333333333333334,18.0,2,1,1,5,5
9 | emukans,15.0,25.0,27.0,3.0,21.0,21.0,1.0,13.0,14.0,6.0,14.0,14.0,8.0,15.0,15.0,22.333333333333332,15.0,9.333333333333334,11.333333333333334,12.666666666666666,5,4,4,4,3
10 | guntis,26.0,29.0,28.0,,,,,,,,,,,,,27.666666666666668,,,,,6,,,,
11 | lukechan1231,34.0,50.0,53.0,21.0,23.0,23.0,4.0,21.0,21.0,5.0,7.0,7.0,5.0,17.0,17.0,45.666666666666664,22.333333333333332,15.333333333333334,6.333333333333333,13.0,7,5,5,3,4
12 | pzchen,5.0,12.0,12.0,5.0,18.0,18.0,18.0,1.0,2.0,4.0,4.0,4.0,4.0,2.0,2.0,9.666666666666666,13.666666666666666,7.0,4.0,2.6666666666666665,4,3,2,2,1
13 | talent404,3.0,3.0,1.0,,,,,,,,,,,,,2.3333333333333335,,,,,1,,,,
14 | the0ne,,,,,,,,,,,,,,,,,,,,,,,,,
15 | tthhanh,,,,,,,,,,,,,,,,,,,,,,,,,
16 | zhwa3087,,,,,,,,,,,,,,,,,,,,,,,,,
17 |
--------------------------------------------------------------------------------
/rankings/final_rankings/revdict-sgns_rankings-per-users.csv:
--------------------------------------------------------------------------------
1 | user,EN MSE,EN cos,EN rank,ES MSE,ES cos,ES rank,FR MSE,FR cos,FR rank,IT MSE,IT cos,IT rank,RU MSE,RU cos,RU rank,Unnamed: 19,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU
2 | Locchi,8.0,14.0,17.0,,,,,,,8.0,13.0,16.0,,,,False,13.0,,,12.333333333333334,,5,,,4,
3 | Nihed_Bendahman_,14.0,26.0,8.0,15.0,19.0,10.0,14.0,20.0,13.0,18.0,16.0,14.0,10.0,15.0,12.0,False,16.0,14.666666666666666,15.666666666666666,16.0,12.333333333333334,6,5,4,6,4
4 | WENGSYX,2.0,6.0,12.0,1.0,6.0,8.0,4.0,5.0,7.0,4.0,10.0,12.0,1.0,1.0,7.0,False,6.666666666666667,5.0,5.333333333333333,8.666666666666666,3.0,2,2,2,3,1
5 | aardoiz,,,,16.0,1.0,1.0,,,,,,,,,,False,,6.0,,,,,3,,,
6 | chlrbgus321,1.0,4.0,9.0,,,,,,,,,,,,,True,4.666666666666667,,,,,1,,,,
7 | cunliang.kong,,,,,,,,,,,,,,,,False,,,,,,,,,,
8 | dkorenci,28.0,1.0,1.0,7.0,2.0,2.0,11.0,1.0,1.0,7.0,1.0,1.0,11.0,2.0,1.0,False,10.0,3.6666666666666665,4.333333333333333,3.0,4.666666666666667,4,1,1,1,2
9 | emukans,,,,,,,,,,,,,,,,False,,,,,,,,,,
10 | guntis,,,,,,,,,,,,,,,,False,,,,,,,,,,
11 | lukechan1231,,,,,,,,,,,,,,,,False,,,,,,,,,,
12 | pzchen,4.0,8.0,11.0,3.0,10.0,12.0,1.0,8.0,9.0,1.0,2.0,8.0,2.0,7.0,8.0,False,7.666666666666667,8.333333333333334,6.0,3.6666666666666665,5.666666666666667,3,4,3,2,3
13 | talent404,,,,,,,,,,,,,,,,True,,,,,,,,,,
14 | the0ne,16.0,22.0,30.0,,,,,,,,,,,,,False,22.666666666666668,,,,,8,,,,
15 | tthhanh,19.0,27.0,24.0,17.0,26.0,22.0,17.0,18.0,22.0,21.0,25.0,22.0,21.0,25.0,22.0,False,23.333333333333332,21.666666666666668,19.0,22.666666666666668,22.666666666666668,9,7,6,7,6
16 | zhwa3087,22.0,15.0,13.0,11.0,17.0,19.0,12.0,17.0,21.0,9.0,14.0,15.0,15.0,11.0,13.0,False,16.666666666666668,15.666666666666666,16.666666666666668,12.666666666666666,13.0,7,6,5,5,5
17 |
--------------------------------------------------------------------------------
/rankings/final_rankings/revdict-char_rankings-per-users.csv:
--------------------------------------------------------------------------------
1 | user,EN MSE,EN cos,EN rank,ES MSE,ES cos,ES rank,FR MSE,FR cos,FR rank,IT MSE,IT cos,IT rank,RU MSE,RU cos,RU rank,Avg EN,Avg ES,Avg FR,Avg IT,Avg RU,Rank EN,Rank ES,Rank FR,Rank IT,Rank RU
2 | Locchi,1.0,1.0,11.0,,,,,,,6.0,7.0,17.0,,,,4.333333333333333,,,10.0,,1,,,4,
3 | Nihed_Bendahman_,7.0,9.0,8.0,7.0,7.0,8.0,9.0,9.0,6.0,8.0,9.0,10.0,7.0,8.0,14.0,8.0,7.333333333333333,8.0,9.0,9.666666666666666,2,2,2,3,4
4 | WENGSYX,24.0,21.0,12.0,20.0,9.0,23.0,18.0,17.0,27.0,21.0,21.0,18.0,22.0,19.0,15.0,19.0,17.333333333333332,20.666666666666668,20.0,18.666666666666668,7,5,5,6,5
5 | aardoiz,,,,,,,,,,,,,,,,,,,,,,,,,
6 | chlrbgus321,,,,,,,,,,,,,,,,,,,,,,,,,
7 | cunliang.kong,,,,,,,,,,,,,,,,,,,,,,,,,
8 | dkorenci,17.0,22.0,1.0,10.0,16.0,1.0,12.0,15.0,2.0,12.0,10.0,1.0,8.0,11.0,1.0,13.333333333333334,9.0,9.666666666666666,7.666666666666667,6.666666666666667,4,3,4,2,2
9 | emukans,,,,,,,,,,,,,,,,,,,,,,,,,
10 | guntis,,,,,,,,,,,,,,,,,,,,,,,,,
11 | lukechan1231,,,,,,,,,,,,,,,,,,,,,,,,,
12 | pzchen,5.0,6.0,15.0,2.0,3.0,12.0,1.0,1.0,7.0,2.0,2.0,12.0,1.0,1.0,8.0,8.666666666666666,5.666666666666667,3.0,5.333333333333333,3.3333333333333335,3,1,1,1,1
13 | talent404,,,,,,,,,,,,,,,,,,,,,,,,,
14 | the0ne,10.0,3.0,28.0,,,,,,,,,,,,,13.666666666666666,,,,,5,,,,
15 | tthhanh,,,,,,,,,,,,,,,,,,,,,,,,,
16 | zhwa3087,19.0,19.0,10.0,17.0,15.0,6.0,13.0,12.0,1.0,9.0,6.0,16.0,15.0,6.0,6.0,16.0,12.666666666666666,8.666666666666666,10.333333333333334,9.0,6,4,3,5,3
17 | ,,,,,,,,,,,,,,,,,,,,,,,,,
18 | ,,,,,,,,,,,,,,,,,,,,,,,,,
19 | ,,,,,,,,,,,,,,,,,,,,,,,,,
20 | ,,,,,,,,,,,,,,,,,,,,,,,,,
21 | ,,,,,,,,,,,,,,,,,,,,,,,,,
22 | ,,,,,,,,,,,,,,,,,,,,,,,,,
23 | ,,,,,,,,,,,,,,,,,,,,,,,,,
24 | ,,,,,,,,,,,,,,,,,,,,,,,,,
25 | ,,,,,,,,,,,,,,,,,,,,,,,,,
26 | ,,,,,,,,,,,,,,,,,,,,,,,,,
27 | ,,,,,,,,,,,,,,,,,,,,,,,,,
28 | ,,,,,,,,,,,,,,,,,,,,,,,,,
29 | ,,,,,,,,,,,,,,,,,,,,,,,,,
30 | ,,,,,,,,,,,,,,,,,,,,,,,,,
31 | ,,,,,,,,,,,,,,,,,,,,,,,,,
32 | ,,,,,,,,,,,,,,,,,,,,,,,,,
33 | ,,,,,,,,,,,,,,,,,,,,,,,,,
34 | ,,,,,,,,,,,,,,,,,,,,,,,,,
35 | ,,,,,zz,,,,,,,,,,,,,,,,,,,,
36 |
--------------------------------------------------------------------------------
/codalab/competition/data.html:
--------------------------------------------------------------------------------
1 |
Data links
2 |
3 |
4 | Data for the competition is available on
5 | our git
6 | repository. The complete datasets will be made available at the end of the
7 | evaluation phase (January 31st).
8 |
9 |
10 | Data format & contents
11 |
12 | All datasets are in JSON format. We minimize whitespace for memory consumption
13 | purposes. Trial, training and development datasets for both definition
14 | modeling and reverse dictionary tracks are shared: as the two tasks are
15 | converse tasks, the source for either one is the target of the other. Test
16 | datasets will be distinct, and have no overlap between the two tracks.
17 |
18 |
19 | Datasets are available for the five following languages: EN, ES, FR, IT, RU.
20 | Datasets are distinct per languages. All five languages contain embeddings for
21 | two distinct types of architectures: "char" (character-based embeddings) and
22 | "sgns" (word2vec Skip-Gram with Negative Sampling). EN, FR and RU have in
23 | addition a third type of embedding available, namely "electra"
24 | (Transformer-based contextual embeddings).
25 |
26 |
27 | All embeddings for any given language were trained on the same corpus;
28 | embedding training corpora across languages were designed to be comparable.
29 | All computation details will be provided in the task description paper.
30 |
31 |
32 | The trial dataset is annotated much more richly than the other datasets.
33 | During the shared task, we will only provide gloss, id and embeddings. After
34 | the evaluation phase, we will provide complete datasets, which will also
35 | include the word being defined and its part of speech.
36 |
37 |
38 | We very strongly encourage participants to reserve the trial dataset for
39 | running manual evaluations of their systems' production. The presence of a
40 | manual evaluation in system descriptions will be taken into account during the
41 | reviewing process and discussed in the task paper.
42 |
43 | License Information
44 |
45 | The complete datasets, embedding architectures and embedding models will be
46 | made publicly available after the evaluation phase under a CC-BY-SA license.
47 | Please link to this page and cite our upcoming task description paper if you
48 | use these datasets in your own work.
49 |
50 |
51 | Dictionary data has been extracted from dumps provided by
52 | Sérasset (2014).
53 |
54 |
--------------------------------------------------------------------------------
/baseline_archs/README.md:
--------------------------------------------------------------------------------
1 | # Baseline scores
2 |
3 | Here are baseline results on the development set for the two tracks, obtained with the architectures described in this sub-directory.
4 | The code here is itself based on the baselines we provided earlier, along with a couple improvements:
5 | - a principled way of selecting hyperparameters (using Bayesian Optimization),
6 | - a sentence-piece retokenization, to ensure the vocabulary is of the same size for all languages,
7 | - a beam-search decoding for the definition modeling pipeline.
8 |
9 | ## Installation
10 | To train these models, you will also need need the `scikit-learn` and `scikit-optimize` libraries, which we used to select hyperparameters.
11 | ```sh
12 | pip3 install scikit-learn==0.24.2 scikit-optimize==0.8.1
13 | ```
14 |
15 |
16 | ## Scores
17 | For the Reverse Dictionary track results, rows will correspond to different targets.
18 | On the other hand, rows of the Definition Modeling table below correspond to different inputs to the system.
19 | Scores were computed using the scoring script provided in this git (`code/score.py`).
20 |
21 | ### Reverse Dictionary track
22 |
23 | | | MSE | Cosine | Ranking
24 | |------------|--------:|--------:|--------:
25 | | en SGNS | 0.91092 | 0.15132 | 0.49030
26 | | en char | 0.14776 | 0.79006 | 0.50218
27 | | en electra | 1.41287 | 0.84283 | 0.49849
28 | | es SGNS | 0.92996 | 0.20406 | 0.49912
29 | | es char | 0.56952 | 0.80634 | 0.49778
30 | | fr SGNS | 1.14050 | 0.19774 | 0.49052
31 | | fr char | 0.39480 | 0.75852 | 0.49945
32 | | fr electra | 1.15348 | 0.85629 | 0.49784
33 | | it SGNS | 1.12536 | 0.20430 | 0.47692
34 | | it char | 0.36309 | 0.72732 | 0.49663
35 | | ru SGNS | 0.57683 | 0.25316 | 0.49008
36 | | ru char | 0.13498 | 0.82624 | 0.49451
37 | | ru electra | 0.87358 | 0.72086 | 0.49120
38 |
39 |
40 | ### Definition Modeling track
41 |
42 | | | Sense-BLEU | Lemma-BLEU | MoverScore
43 | |------------|-----------:|-----------:|-----------:
44 | | en SGNS | 0.00125 | 0.00250 | 0.10339
45 | | en char | 0.00011 | 0.00022 | 0.08852
46 | | en electra | 0.00165 | 0.00215 | 0.08798
47 | | es SGNS | 0.01536 | 0.02667 | 0.20130
48 | | es char | 0.01505 | 0.02471 | 0.19933
49 | | fr SGNS | 0.00351 | 0.00604 | 0.18478
50 | | fr char | 0.00280 | 0.00706 | 0.18579
51 | | fr electra | 0.00219 | 0.00301 | 0.17391
52 | | it SGNS | 0.02591 | 0.04081 | 0.20527
53 | | it char | 0.00640 | 0.00919 | 0.15902
54 | | ru SGNS | 0.01520 | 0.02112 | 0.34716
55 | | ru char | 0.01313 | 0.01847 | 0.32307
56 | | ru electra | 0.01189 | 0.01457 | 0.33577
57 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 |
3 | ENV DEBIAN_FRONTEND=noninteractive
4 | RUN apt-get update && apt-get install -y \
5 | software-properties-common && \
6 | rm -rf /var/lib/apt/lists/*
7 | RUN add-apt-repository ppa:deadsnakes/ppa
8 |
9 | # set up python
10 | RUN apt-get update && apt-get install -y \
11 | apt-transport-https \
12 | iputils-ping \
13 | git \
14 | curl \
15 | build-essential \
16 | cmake \
17 | libhdf5-dev \
18 | swig \
19 | wget \
20 | python3.8 \
21 | python3.8-venv \
22 | python3.8-dev \
23 | python3-pip \
24 | python3-software-properties
25 |
26 | RUN curl https://bootstrap.pypa.io/get-pip.py | python3.8
27 |
28 | # Without this Python thinks we're ASCII and unicode chars fail
29 | ENV LANG C.UTF-8
30 |
31 | RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
32 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1
33 |
34 | # install libraries
35 | RUN pip3 install -U --no-cache-dir \
36 | absl-py==0.12.0 \
37 | cachetools==4.2.2 \
38 | certifi==2020.12.5 \
39 | chardet==4.0.0 \
40 | click==8.0.1 \
41 | cycler==0.10.0 \
42 | filelock==3.0.12 \
43 | google-auth==1.30.0 \
44 | google-auth-oauthlib==0.4.4 \
45 | grpcio==1.37.1 \
46 | huggingface-hub==0.0.12 \
47 | idna==2.10 \
48 | joblib==1.0.1 \
49 | kiwisolver==1.3.1 \
50 | Markdown==3.3.4 \
51 | matplotlib==3.4.2 \
52 | moverscore==1.0.3 \
53 | nltk==3.6.7 \
54 | numpy>=1.20.3 \
55 | oauthlib==3.1.0 \
56 | packaging==21.0 \
57 | Pillow==8.3.0 \
58 | portalocker==2.3.0 \
59 | protobuf==3.17.0 \
60 | pyasn1==0.4.8 \
61 | pyasn1-modules==0.2.8 \
62 | pyemd \
63 | pyparsing==2.4.7 \
64 | python-dateutil==2.8.1 \
65 | PyYAML==5.4.1 \
66 | regex==2022.1.18 \
67 | requests==2.25.1 \
68 | requests-oauthlib==1.3.0 \
69 | rsa==4.7.2 \
70 | sacremoses==0.0.45 \
71 | sentencepiece==0.1.96 \
72 | six==1.16.0 \
73 | tensorboard==2.5.0 \
74 | tensorboard-data-server==0.6.1 \
75 | tensorboard-plugin-wit==1.8.0 \
76 | tokenizers==0.8.1rc2 \
77 | torch==1.8.1 \
78 | tqdm==4.60.0 \
79 | transformers==3.1.0 \
80 | typing==3.7.4.3 \
81 | typing-extensions==3.10.0.0 \
82 | urllib3==1.26.4 \
83 | Werkzeug==2.0.1
84 |
85 | # the next line will patch moverscore so that it runs on cpu, rather than on your cuda:0 device.
86 | # comment this line if you have access to a GPU
87 | RUN find . -type f -name moverscore_v2.py -exec sed -i 's/cuda:0/cpu/g' {} \;
88 | RUN find . -type f -name moverscore_v2.py -exec sed -i '2 i\import os' {} \;
89 | RUN find . -type f -name moverscore_v2.py -exec sed -i "s/model_name = 'distilbert-base-uncased'/model_name = os.environ.get('MOVERSCORE_MODEL', 'distilbert-base-uncased')/g" {} \;
90 |
91 |
92 | RUN python3 -c "import nltk; nltk.download('punkt');"
93 | RUN python3 -c "import os; os.environ['MOVERSCORE_MODEL'] = 'distilbert-base-multilingual-cased' ; import moverscore_v2"
94 |
--------------------------------------------------------------------------------
/rankings/README.md:
--------------------------------------------------------------------------------
1 | # What is in this directory?
2 |
3 | This subdirectory contains three subdirectories.
4 | First is `submission_scores`, which lists the submissions we received and the scores they were attributed by the scoring program.
5 | Second is `submission_ranks`, which converts submission scores into ranks (i.e., how many submissions fared better or equal to this one?).
6 | Last is `final_rankings`, which lists the maximum rank per user, the average of these maximum ranks, and a nominal ranking per users, as listed below.
7 |
8 | We also include the python scripts we used to convert raw submission scores into official rankings.
9 |
10 | # Official rankings
11 |
12 | Below are the official rankings for the SemEval 2022 CODWOE Shared task.
13 |
14 | ### Definition Modeling track
15 |
16 | Below are the results for the Definition Modeling track.
17 |
18 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
19 | |---------------|--------:|--------:|--------:|--------:|--------:
20 | | Locchi | 8 | 6 | | 7 |
21 | | WENGSYX | 9 | 7 | 6 | 6 | 6
22 | | cunliang.kong | 3 | 2 | 3 | **1** | 2
23 | | IRB-NLP | 2 | **1** | **1** | 5 | 5
24 | | emukans | 5 | 4 | 4 | 4 | 3
25 | | guntis | 6 | | | |
26 | | lukechan1231 | 7 | 5 | 5 | 3 | 4
27 | | pzchen | 4 | 3 | 2 | 2 | **1**
28 | | talent404 | **1** | | | |
29 |
30 | ### Reverse Dictionary track
31 |
32 | Below are the results for the Reverse dictionary tracks.
33 | There are separate rankings, based on which targets participants have submitted.
34 |
35 | #### A. SGNS targets
36 |
37 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
38 | |------------------|--------:|--------:|--------:|--------:|--------:
39 | | Locchi | 4 | | | 4 |
40 | | Nihed_Bendahman_ | 5 | 5 | 4 | 6 | 4
41 | | WENGSYX | **1** | 2 | 2 | 3 | **1**
42 | | MMG | | 3 | | |
43 | | chlrbgus321 | N/A | | | |
44 | | IRB-NLP | 3 | **1** | **1** | **1** | 2
45 | | pzchen | 2 | 4 | 3 | 2 | 3
46 | | the0ne | 7 | | | |
47 | | tthhanh | 8 | 7 | 6 | 7 | 6
48 | | zhwa3087 | 6 | 6 | 5 | 5 | 5
49 |
50 | #### B. ELECTRA targets
51 |
52 | | user / team | Rank EN | Rank FR | Rank RU
53 | |------------------|--------:|--------:|--------:
54 | | Locchi | 3 | |
55 | | Nihed_Bendahman_ | 2 | 2 | 4
56 | | WENGSYX | 4 | 4 | 2
57 | | IRB-NLP | 5 | 3 | 3
58 | | pzchen | **1** | **1** | **1**
59 | | the0ne | 6 | |
60 |
61 |
62 | #### C. Char-based targets
63 |
64 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
65 | |------------------|--------:|--------:|--------:|--------:|--------:
66 | | Locchi | **1** | | | 4 |
67 | | Nihed_Bendahman_ | 2 | 2 | 2 | 3 | 4
68 | | WENGSYX | 7 | 5 | 5 | 6 | 5
69 | | IRB-NLP | 4 | 3 | 4 | 2 | 2
70 | | pzchen | 3 | **1** | **1** | **1** | **1**
71 | | the0ne | 5 | | | |
72 | | zhwa3087 | 6 | 4 | 3 | 5 | 3
73 |
74 |
75 | # How were rankings computed?
76 |
77 | See the python scripts.
78 |
79 | We start by converting scalar scores into ranked scores: i.e., instead of considering the absolute value obtained by a submission, we count how many submissions fared better than the current one.
80 | This is done so as to neutralize the fact that some metrics have to be maximized (e.g., cosine), whereas others ought to be minimized (e.g., MSE), and that all metrics have different maxima and minima (e.g., MSE is defined from 0 to infinity, but cosine is defined from -1 to +1).
81 |
82 | We then take the maximum rank per user, and average across the three metrics for each target, namely:
83 | - MSE, cosine and rank-cosine for each target architecture in the Reverse Dictionary track
84 | - S-BLEU, L-BLEU and MoverScore for the Definition Modeling
85 |
86 | Finally, we manually converted the average per targets into nominal rankings.
87 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # Dataset access.
2 |
3 | **The datasets are available at the following page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).**
4 |
5 | # Dataset structure
6 |
7 | This document details the structure of the JSON dataset file we provide. More information is available on the competition website: [link](https://competitions.codalab.org/competitions/34022#participate-get_data).
8 |
9 |
10 | ## Brief Overview
11 |
12 | As an overview, the expected usage of the datasets is as follow:
13 | + In the Definition Modeling track, we expect participants to use the embeddings ("char", "sgns", "electra") to generate the associated definition ("gloss").
14 | + In the Reverse Dictionary track, we expect participants to use the definition ("gloss") to generate any of the associated embeddings ("char", "sgns", "electra").
15 |
16 |
17 | ## Dataset files structure
18 |
19 | Each dataset file correspond to a data split (trial/train/dev/test) for one of the languages.
20 |
21 | Dataset files are in the JSON format. A dataset file contains a list of examples. Each example is a JSON dictionary, containing the following keys:
22 | + "id",
23 | + "gloss"
24 | + "sgns"
25 | + "char"
26 |
27 | The English, French and Russian dictionary also contain an "electra" key.
28 |
29 | As a concrete instance, here is an example from the English training dataset:
30 | ```json
31 | {
32 | "id": "en.train.2",
33 | "gloss": "A vocal genre in Hindustani classical music",
34 | "sgns": [
35 | -0.0602365807,
36 | ...
37 | ],
38 | "char": [
39 | -0.3631578386,
40 | ...
41 | ],
42 | "electra": [
43 | -1.3904430866,
44 | ...
45 | ]
46 | },
47 | ```
48 |
49 | ## Description of contents
50 |
51 | The value associated to "id" tracks the language, data split and unique identifier for this example.
52 |
53 | The value associated to the "gloss" key is a definition, as you would find in a classical dictionary. It is to be used either the target in the Definition Modeling track, or asthe source in the Reverse Dictionary track.
54 |
55 | All other keys ("char", "sgns", "electra") correspond to embeddings, and the associated values are arrays of floats representing the components. They all can serve as targets for the Reverse Dictionary track.
56 | + "char" corresponds to character-based embeddings, computed using an auto-encoder on the spelling of a word.
57 | + "sgns" corresponds to skip-gram with negative sampling embeddings (aka. word2vec)
58 | + "electra" corresponds to Transformer-based contextualized embeddings.
59 |
60 |
61 | ## Using the dataset files
62 |
63 | Given that the data is in JSON format, it is straightforward to load it in python:
64 |
65 | ```python
66 | import json
67 | with open(PATH_TO_DATASET, "r") as file_handler:
68 | dataset = json.load(file_handler)
69 | ```
70 |
71 | A more complete example for pytorch is available in the git repository (see here: [link](https://git.atilf.fr/tmickus/codwoe/-/blob/master/code/data.py#L18)).
72 |
73 | ## Expected output format
74 |
75 | During the evaluation phase, we will expect submissions to reconstruct the same JSON format.
76 |
77 | The test JSON files for input will be separate for each track. They will contain the "id" key, and either the "gloss" key (in the reverse dictionary track) or the embedding keys ("char" and "sgns" keys, and "electra" "key" in EN/FR/RU, in the definition modeling track).
78 |
79 | In the definition modeling track, participants should construct JSON files that contain at least the two following keys:
80 | + the original "id"
81 | + their generated "gloss"
82 |
83 | In the reverse dictionary, participants should construct JSON files that contain at least the two following keys:
84 | + the original "id",
85 | + any of the valid embeddings ("char", "sgns", or "electra" key in EN/FR/RU)
86 |
87 | Other keys can be added. More details concerning the evaluation procedure are available here: [link](https://competitions.codalab.org/competitions/34022#learn_the_details-evaluation).
88 |
89 | ## License Information
90 |
91 | The complete datasets, embedding architectures and embedding models will be made publicly available after the evaluation phase under a CC-BY-SA license. Please link to the competition website page ([link](https://competitions.codalab.org/competitions/34022)) and cite our upcoming task description paper if you use these datasets in your own work.
92 |
93 | Dictionary data has been extracted from dumps provided by [Sérasset (2014)](http://kaiko.getalp.org/about-dbnary/). Embeddings were trained specifically for this shared task; all details will be made available in the task description paper.
94 |
95 |
--------------------------------------------------------------------------------
/codalab/competition/terms_and_conditions.html:
--------------------------------------------------------------------------------
1 | Terms and Conditions
2 |
3 | Participants should generally adopt a spirit of good sportsmanship and
4 | avoid any unfair or otherwise unconscionable conduct. We provide the following
5 | terms and conditions to clearly delineate the guidelines to which the
6 | participants are expected to adhere. Organizers reserve the right to amend in
7 | any way the following terms, in which case modifications will be advertised
8 | through the shared task mailing list and the CodaLab forums.
9 |
10 | Participants may contact the organizers if any of the following terms
11 | raises their concern.
12 |
13 |
14 | Participation to the competition: Any interested person may freely
15 | participate to the competition. By participating to the competition, you agree
16 | to the terms and conditions in their entirety, without amendment or provision.
17 | By participating to the competition, you understand and agree that your scores
18 | and submissions will be made public.
19 |
20 | Scores and submissions are understood as any direct or indirect contributions
21 | to this site or the shared task organizers, such as, but not limited to:
22 | results of automatic scoring programs; manual, qualitative and quantitative
23 | assessments of the data submitted; etc.
24 |
25 | Participants may create teams. Participants may not be part of more than one
26 | team. Teams and participants not belonging to any team must create exactly one
27 | account to the codalab competition. Team composition may not be changed once
28 | the evaluation phase starts.
29 |
30 |
31 | Scoring of submissions: Organizers are under no obligation to release
32 | scores. Official scores may be withheld, amended or removed if organizers
33 | judge the submission incomplete, erroneous, deceptive, or violating the letter
34 | or spirit of the competition's rules. Inclusion of a submission's scores is
35 | not an endorsement of a team or individual's submission, system, or science.
36 |
37 | Up to 50 submissions will be allowed during the evaluation phase. Scores will
38 | not be visible on the leaderboards until the evaluation phase is over.
39 |
40 | Submission files will be grouped according to the track, language, and in the
41 | case of the reverse dictionary track, the embedding architecture targeted; the
42 | last submission file per group will be understood as the team's or
43 | participant's definitive submission and ranked as such in the task description
44 | paper.
45 |
46 |
47 | Data usage: The provided data should be used responsibly and ethically.
48 | Do not attempt to misuse it in any way, including, but not limited to,
49 | reconstructing test sets, any none-scientific use of the data, or any other
50 | unconscionable usage of the data.
51 |
52 | During the course of the shared task, participants are not allowed to use
53 | any external data. This is to ensure that results are immediately
54 | comparable. Participants will be allowed to use external data once the
55 | evaluation phase is over for system review. All data will be released at the
56 | end of the evaluation phase.
57 |
58 |
59 | Submission of system description papers: Participants having made at
60 | least one submission during the evaluation phase will be invited to submit a
61 | paper describing their system. As a requirement, a link to the code of
62 | systems being described will be made available to organizers or the public at
63 | large. Participants submitting a system description paper will also be
64 | asked to review papers submitted by their peers in a single-blind process.
65 |
66 | We further encourage system description papers to include a manual analysis
67 | of their systems results and productions. The presence and quality of such
68 | an analysis will be assessed during the review process. The task description
69 | paper will also devote a significant amount of space to highlighting
70 | outstanding manual evaluations conducted by participants.
71 |
72 | Collection of system productions: Participants having made at least one
73 | submission during the evaluation phase will be invited to submit their
74 | systems' outputs to a dataset of system productions. The purpose of this
75 | collection of system productions will solely be to propose them as a shared
76 | task for upcoming text generation evaluation campaigns.
77 |
78 |
79 | Funding Acknowledgments: This shared task was supported by a public
80 | grant overseen by the French National Research Agency (ANR) as part of the
81 | "Investissements d'Avenir" program: Idex Lorraine Université
82 | d'Excellence (reference: ANR-15-IDEX-0004).
83 |
84 | Future sponsors, if any, will be appended to this section.
85 |
86 |
--------------------------------------------------------------------------------
/code/check_output.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import collections
3 | import json
4 | import logging
5 | import pathlib
6 | import sys
7 |
8 | logger = logging.getLogger(pathlib.Path(__file__).name)
9 | logger.setLevel(logging.DEBUG)
10 | handler = logging.StreamHandler(sys.stdout)
11 | handler.setFormatter(
12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
13 | )
14 | logger.addHandler(handler)
15 |
16 |
17 | def get_parser(
18 | parser=argparse.ArgumentParser(
19 | description="Verify the output format of a submission"
20 | ),
21 | ):
22 | parser.add_argument("submission_file", type=pathlib.Path, help="file to check")
23 | return parser
24 |
25 |
26 | def main(filename):
27 | try:
28 | with open(filename, "r") as istr:
29 | items = json.load(istr)
30 | except:
31 | raise ValueError(f'File "{filename}": could not open, submission will fail.')
32 | else:
33 | # expected_keys = {"id", "gloss"}
34 | for item in items:
35 | # keys_not_found = expected_keys - set(item.keys())
36 | if "id" not in item:
37 | raise ValueError(
38 | f'File "{filename}": one or more items do not contain an id, submission will fail.'
39 | )
40 | ids = sorted([item["id"] for item in items])
41 | ids = [i.split(".") for i in ids]
42 | langs = {i[0] for i in ids}
43 | if len(langs) != 1:
44 | raise ValueError(
45 | f'File "{filename}": ids do not identify a unique language, submission will fail.'
46 | )
47 | tracks = {i[-2] for i in ids}
48 | if len(tracks) != 1:
49 | raise ValueError(
50 | f'File "{filename}": ids do not identify a unique track, submission will fail.'
51 | )
52 | track = next(iter(tracks))
53 | if track not in ("revdict", "defmod"):
54 | raise ValueError(
55 | f'File "{filename}": unknown track identified {track}, submission will fail.'
56 | )
57 | lang = next(iter(langs))
58 | if lang not in ("en", "es", "fr", "it", "ru"):
59 | raise ValueError(
60 | f'File "{filename}": unknown language {lang}, submission will fail.'
61 | )
62 | serials = list(sorted({int(i[-1]) for i in ids}))
63 | if serials != list(range(1, len(ids) + 1)):
64 | raise ValueError(
65 | f'File "{filename}": ids do not identify all items in dataset, submission will fail.'
66 | )
67 | if track == "revdict":
68 | vec_archs = set(items[0].keys()) - {
69 | "id",
70 | "gloss",
71 | "word",
72 | "pos",
73 | "concrete",
74 | "example",
75 | "f_rnk",
76 | "counts",
77 | "polysemous",
78 | }
79 | if len(vec_archs) == 0:
80 | raise ValueError(
81 | f'File "{filename}": no vector architecture was found, revdict submission will fail.'
82 | )
83 | for item in items:
84 | if not all(v in item for v in vec_archs):
85 | raise ValueError(
86 | f'File "{filename}": some items do not contain all the expected vectors, revdict submission will fail.'
87 | )
88 | if len(vec_archs - {"sgns", "char", "electra"}):
89 | raise ValueError(
90 | f'File "{filename}": unknown vector architecture(s), revdict submission will fail.'
91 | )
92 | if track == "defmod" and any("gloss" not in i for i in items):
93 | raise ValueError(
94 | f'File "{filename}": some items do not contain a gloss, defmod submission will fail.'
95 | )
96 |
97 | ok_message = (
98 | f'File "{filename}": no problems were identified.\n'
99 | + f"The submission will be understood as follows:\n"
100 | + f"\tSubmission on track {track} for language {lang}, {len(ids)} predictions.\n"
101 | )
102 | if track == "revdict":
103 | vec_archs = tuple(sorted(vec_archs))
104 | ok_message += (
105 | f'\tSubmission predicts these embeddings: {", ".join(vec_archs)}.'
106 | )
107 | else:
108 | vec_archs = None
109 | logger.debug(ok_message)
110 | CheckSummary = collections.namedtuple(
111 | "CheckSummary", ["filename", "track", "lang", "vec_archs"]
112 | )
113 | return CheckSummary(filename, track, lang, vec_archs)
114 |
115 |
116 | if __name__ == "__main__":
117 | main(get_parser().parse_args().submission_file)
118 |
--------------------------------------------------------------------------------
/baseline_archs/code/check_output.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import collections
3 | import json
4 | import logging
5 | import pathlib
6 | import sys
7 |
8 | logger = logging.getLogger(pathlib.Path(__file__).name)
9 | logger.setLevel(logging.DEBUG)
10 | handler = logging.StreamHandler(sys.stdout)
11 | handler.setFormatter(
12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
13 | )
14 | logger.addHandler(handler)
15 |
16 |
17 | def get_parser(
18 | parser=argparse.ArgumentParser(
19 | description="Verify the output format of a submission"
20 | ),
21 | ):
22 | parser.add_argument("submission_file", type=pathlib.Path, help="file to check")
23 | return parser
24 |
25 |
26 | def main(filename):
27 | try:
28 | with open(filename, "r") as istr:
29 | items = json.load(istr)
30 | except:
31 | raise ValueError(f'File "{filename}": could not open, submission will fail.')
32 | else:
33 | # expected_keys = {"id", "gloss"}
34 | for item in items:
35 | # keys_not_found = expected_keys - set(item.keys())
36 | if "id" not in item:
37 | raise ValueError(
38 | f'File "{filename}": one or more items do not contain an id, submission will fail.'
39 | )
40 | ids = sorted([item["id"] for item in items])
41 | ids = [i.split(".") for i in ids]
42 | langs = {i[0] for i in ids}
43 | if len(langs) != 1:
44 | raise ValueError(
45 | f'File "{filename}": ids do not identify a unique language, submission will fail.'
46 | )
47 | tracks = {i[-2] for i in ids}
48 | if len(tracks) != 1:
49 | raise ValueError(
50 | f'File "{filename}": ids do not identify a unique track, submission will fail.'
51 | )
52 | track = next(iter(tracks))
53 | if track not in ("revdict", "defmod"):
54 | raise ValueError(
55 | f'File "{filename}": unknown track identified {track}, submission will fail.'
56 | )
57 | lang = next(iter(langs))
58 | if lang not in ("en", "es", "fr", "it", "ru"):
59 | raise ValueError(
60 | f'File "{filename}": unknown language {lang}, submission will fail.'
61 | )
62 | serials = list(sorted({int(i[-1]) for i in ids}))
63 | if serials != list(range(1, len(ids) + 1)):
64 | raise ValueError(
65 | f'File "{filename}": ids do not identify all items in dataset, submission will fail.'
66 | )
67 | if track == "revdict":
68 | vec_archs = set(items[0].keys()) - {
69 | "id",
70 | "gloss",
71 | "word",
72 | "pos",
73 | "concrete",
74 | "example",
75 | "f_rnk",
76 | "counts",
77 | "polysemous",
78 | }
79 | if len(vec_archs) == 0:
80 | raise ValueError(
81 | f'File "{filename}": no vector architecture was found, revdict submission will fail.'
82 | )
83 | for item in items:
84 | if not all(v in item for v in vec_archs):
85 | raise ValueError(
86 | f'File "{filename}": some items do not contain all the expected vectors, revdict submission will fail.'
87 | )
88 | if len(vec_archs - {"sgns", "char", "electra"}):
89 | raise ValueError(
90 | f'File "{filename}": unknown vector architecture(s), revdict submission will fail.'
91 | )
92 | if track == "defmod" and any("gloss" not in i for i in items):
93 | raise ValueError(
94 | f'File "{filename}": some items do not contain a gloss, defmod submission will fail.'
95 | )
96 |
97 | ok_message = (
98 | f'File "{filename}": no problems were identified.\n'
99 | + f"The submission will be understood as follows:\n"
100 | + f"\tSubmission on track {track} for language {lang}, {len(ids)} predictions.\n"
101 | )
102 | if track == "revdict":
103 | vec_archs = tuple(sorted(vec_archs))
104 | ok_message += (
105 | f'\tSubmission predicts these embeddings: {", ".join(vec_archs)}.'
106 | )
107 | else:
108 | vec_archs = None
109 | logger.debug(ok_message)
110 | CheckSummary = collections.namedtuple(
111 | "CheckSummary", ["filename", "track", "lang", "vec_archs"]
112 | )
113 | return CheckSummary(filename, track, lang, vec_archs)
114 |
115 |
116 | if __name__ == "__main__":
117 | main(get_parser().parse_args().submission_file)
118 |
--------------------------------------------------------------------------------
/code/models.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | import data
8 |
9 |
10 | class PositionalEncoding(nn.Module):
11 | """From PyTorch"""
12 |
13 | def __init__(self, d_model, dropout=0.1, max_len=4096):
14 | super(PositionalEncoding, self).__init__()
15 | self.dropout = nn.Dropout(p=dropout)
16 | pe = torch.zeros(max_len, d_model)
17 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
18 | div_term = torch.exp(
19 | torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
20 | )
21 | pe[:, 0::2] = torch.sin(position * div_term)
22 | pe[:, 1::2] = torch.cos(position * div_term)
23 | pe = pe.unsqueeze(0).transpose(0, 1)
24 | self.register_buffer("pe", pe)
25 |
26 | def forward(self, x):
27 | x = x + self.pe[: x.size(0)]
28 | return self.dropout(x)
29 |
30 |
31 | class DefmodModel(nn.Module):
32 | """A transformer architecture for Definition Modeling."""
33 |
34 | def __init__(
35 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=256
36 | ):
37 | super(DefmodModel, self).__init__()
38 | self.d_model = d_model
39 | self.padding_idx = vocab[data.PAD]
40 | self.eos_idx = vocab[data.EOS]
41 | self.maxlen = maxlen
42 |
43 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx)
44 | self.positional_encoding = PositionalEncoding(
45 | d_model, dropout=dropout, max_len=maxlen
46 | )
47 | encoder_layer = nn.TransformerEncoderLayer(
48 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2
49 | )
50 | self.transformer_encoder = nn.TransformerEncoder(
51 | encoder_layer, num_layers=n_layers
52 | )
53 | self.v_proj = nn.Linear(d_model, len(vocab))
54 | # initializing weights
55 | for name, param in self.named_parameters():
56 | if param.dim() > 1:
57 | nn.init.xavier_uniform_(param)
58 | elif "bias" in name:
59 | nn.init.zeros_(param)
60 | else: # gain parameters of the layer norm
61 | nn.init.ones_(param)
62 |
63 | def generate_square_subsequent_mask(self, sz):
64 | "from Pytorch"
65 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
66 | mask = (
67 | mask.float()
68 | .masked_fill(mask == 0, float("-inf"))
69 | .masked_fill(mask == 1, float(0.0))
70 | )
71 | return mask
72 |
73 | def forward(self, vector, input_sequence=None):
74 | device = next(self.parameters()).device
75 | embs = self.embedding(input_sequence)
76 | seq = torch.cat([vector.unsqueeze(0), embs], dim=0)
77 | src = self.positional_encoding(seq)
78 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device)
79 | src_key_padding_mask = torch.cat(
80 | [
81 | torch.tensor([[False] * input_sequence.size(1)]).to(device),
82 | (input_sequence == self.padding_idx),
83 | ],
84 | dim=0,
85 | ).t()
86 | transformer_output = self.transformer_encoder(
87 | src, mask=src_mask, src_key_padding_mask=src_key_padding_mask
88 | )
89 | v_dist = self.v_proj(transformer_output)
90 | return v_dist
91 |
92 | @staticmethod
93 | def load(file):
94 | return torch.load(file)
95 |
96 | def save(self, file):
97 | torch.save(self, file)
98 |
99 | def pred(self, vector):
100 | generated_symbols = []
101 | device = next(self.parameters()).device
102 | batch_size = vector.size(0)
103 | src = vector.unsqueeze(0)
104 | has_stopped = torch.tensor([False] * batch_size).to(device)
105 | src_key_padding_mask = torch.tensor([[False] * batch_size]).to(device)
106 | for step_idx in range(self.maxlen):
107 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device)
108 | src_pe = self.positional_encoding(src)
109 | transformer_output = self.transformer_encoder(
110 | src_pe, mask=src_mask, src_key_padding_mask=src_key_padding_mask.t()
111 | )[-1]
112 | v_dist = self.v_proj(transformer_output)
113 | new_symbol = v_dist.argmax(-1)
114 | new_symbol = new_symbol.masked_fill(has_stopped, self.padding_idx)
115 | generated_symbols.append(new_symbol)
116 | src_key_padding_mask = torch.cat(
117 | [src_key_padding_mask, has_stopped.unsqueeze(0)], dim=0
118 | )
119 | has_stopped = has_stopped | (new_symbol == self.eos_idx)
120 | src = torch.cat([src, self.embedding(new_symbol).unsqueeze(0)], dim=0)
121 | if has_stopped.all():
122 | break
123 | output_sequence = torch.stack(generated_symbols, dim=0)
124 | return output_sequence
125 |
126 |
127 | class RevdictModel(nn.Module):
128 | """A transformer architecture for Definition Modeling."""
129 |
130 | def __init__(
131 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=512
132 | ):
133 | super(RevdictModel, self).__init__()
134 | self.d_model = d_model
135 | self.padding_idx = vocab[data.PAD]
136 | self.eos_idx = vocab[data.EOS]
137 | self.maxlen = maxlen
138 |
139 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx)
140 | self.positional_encoding = PositionalEncoding(
141 | d_model, dropout=dropout, max_len=maxlen
142 | )
143 | encoder_layer = nn.TransformerEncoderLayer(
144 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2
145 | )
146 | self.transformer_encoder = nn.TransformerEncoder(
147 | encoder_layer, num_layers=n_layers
148 | )
149 | self.dropout = nn.Dropout(p=dropout)
150 | self.e_proj = nn.Linear(d_model, d_model)
151 | for name, param in self.named_parameters():
152 | if param.dim() > 1:
153 | nn.init.xavier_uniform_(param)
154 | elif "bias" in name:
155 | nn.init.zeros_(param)
156 | else: # gain parameters of the layer norm
157 | nn.init.ones_(param)
158 |
159 | def forward(self, gloss_tensor):
160 | src_key_padding_mask = gloss_tensor == self.padding_idx
161 | embs = self.embedding(gloss_tensor)
162 | src = self.positional_encoding(embs)
163 | transformer_output = self.dropout(
164 | self.transformer_encoder(src, src_key_padding_mask=src_key_padding_mask.t())
165 | )
166 | summed_embs = transformer_output.masked_fill(
167 | src_key_padding_mask.unsqueeze(-1), 0
168 | ).sum(dim=0)
169 | return self.e_proj(F.relu(summed_embs))
170 |
171 | @staticmethod
172 | def load(file):
173 | return torch.load(file)
174 |
175 | def save(self, file):
176 | torch.save(self, file)
177 |
--------------------------------------------------------------------------------
/codalab/competition/overview.html:
--------------------------------------------------------------------------------
1 |
2 | CODWOE: COmparing Dictionaries and WOrd Embeddings
3 |
4 |
5 | The CODWOE shared task invites you to compare two types of semantic
6 | descriptions: dictionary glosses and word embedding representations. Are these
7 | two types of representation equivalent? Can we generate one from the other? To
8 | study this question, we propose two subtracks: a definition modeling
9 | track (Noraset et
10 | al., 2017), where participants have to generate glosses from
11 | vectors, and a reverse dictionary track
12 | (Hill et al., 2016), where
13 | participants have to generate vectors from glosses.
14 |
15 |
16 | These two tracks display a number of interesting characteristics. Definition
17 | modeling is a vector-to-sequence task, the reverse dictionary task is a
18 | sequence-to-vector task—and you know that kind of thing gets NLP people
19 | swearing out loud. These tasks are also useful for explainable AI, since they
20 | involve converting human-readable data into machine-readable data and back.
21 |
22 |
23 | Dictionaries contain definitions, such as
24 | Merriam
25 | Webster's:
26 |
27 |
28 | cod: any of various bottom-dwelling fishes (family Gadidae, the cod
29 | family) that usually occur in cold marine waters and often have barbels and
30 | three dorsal fins
31 |
32 |
33 | The task of definition modeling consists in using the vector representation of
34 | co⃗d to produce the associated gloss, "any of various
35 | bottom-dwelling fishes (family Gadidae, the cod family) that usually occur in
36 | cold marine waters and often have barbels and three dorsal fins". The
37 | reverse dictionary task is the mathematical inverse: reconstruct an embedding
38 | co⃗d from the corresponding gloss.
39 |
40 |
41 | These two tracks display a number of interesting characteristics. These tasks
42 | are obviously useful for explainable AI, since they involve converting
43 | human-readable data into machine-readable data and back. They also have a
44 | theoretical significance: both glosses and word embeddings are also
45 | representations of meaning, and therefore involve the conversion of distinct
46 | non-formal semantic representations. From a practical point of view, the
47 | ability to infer word-embeddings from dictionary resources, or dictionaries
48 | from large unannotated corpora, would prove a boon for many under-resourced
49 | languages.
50 |
51 |
52 | Dive right in and get started!
53 |
54 |
55 | The data can be retrieved from
56 | our git
57 | repository. See the related codalab
58 | page for more details as well.
59 |
60 |
61 | To help participants get started, we provide a basic architecture for both
62 | tracks, a submission format checker, and the scoring script. All of this is
63 | available in our public git
64 | repository.
65 |
66 |
67 | Keep in mind the we do not allow external data! The point is to keep
68 | results linguistically significant and easily comparable. For all details on
69 | how we will evaluate submissions, check the relevant
70 | codalab page.
71 |
72 |
73 |
74 | What we are fishing for with this shared task
75 |
76 |
77 | Rather than focusing strictly on getting the highest scores on a benchmark, we
78 | encourage participants to approach this shared task as a collaborative
79 | research question: how should we compare two vastly different types of
80 | semantic representations such as dictionaries and word embeddings? What
81 | caveats are there? In fact, we already have a few questions we look forward to
82 | study at the end of this shared task:
83 |
84 |
85 | -
86 | Do all architectures yield comparable results? Transformers, for
87 | instance, are generally hard to tune, require large amounts of data to train
88 | and have no default way of being primed with a vector: how will they fare on
89 | our two tracks?
90 |
91 | -
92 | What are the effects of combining different inputs? Do multilingual
93 | models fare better than monolingual models? Does handling both tracks with
94 | the same model help or hinder results?
95 |
96 | -
97 | Do contextual embeddings help to define polysemous words? Most
98 | approaches that use contextual embeddings in downstream applications rely on
99 | fine-tuning. Will contextual embeddings used as features also prove helpful?
100 |
101 |
102 |
103 | These are but a few questions that we are interested in—do come up with
104 | your own to test during this shared task! To encourage participants to adopt
105 | this mindset, here are a few key elements of this shared task:
106 |
107 |
108 | -
109 | data from 5 languages (EN, ES, FR, IT, RU) and from multiple
110 | embedding architectures, both static and contextual, all trained on
111 | comparable corpora
112 |
113 | -
114 | a richly annotated trial dataset, which will be useful for the manual
115 | evaluation of your systems
116 |
117 | -
118 | usage of external resources is not allowed, to ensure that all
119 | submissions are comparable
120 |
121 | -
122 | a strong focus on manual analyses of a submitted model’s behavior
123 | during the reviewing process
124 |
125 |
126 |
127 | As is usual for SemEval tasks, we will release all data at the end of the
128 | shared task. Depending on participants’ consent, we also plan to collect the
129 | productions of all models and reuse them in a future evaluation campaign.
130 |
131 |
132 |
133 | Shared task timeline (this too shall bass)
134 |
135 |
136 | Here are the key dates participants should keep in mind. Do note that these
137 | are subject to change.
138 |
139 | -
140 | September 3, 2021: Training data & development data made available
141 |
142 | -
143 | January 10, 2022: Evaluation data made available & evaluation start
144 |
145 | -
146 | January 31, 2022: Evaluation end
147 |
148 | -
149 | February 23, 2022: Paper submission due
150 |
151 | -
152 | March 31, 2022: Notification to authors
153 |
154 |
155 | Camera-ready due date and SemEval 2022 workshops will be announced at a later
156 | date.
157 |
158 |
159 |
160 | You have an issue? You need kelp? Get in touch!
161 |
162 |
163 | There’s a google group for all prospective participants: check it out at
164 |
165 | semeval2022-dictionaries-and-word-embeddings@googlegroups.com. You can
166 | also reach us organizers directly at
167 | tmickus@atilf.fr; make sure to mention the SemEval task in the email
168 | subject.
169 |
170 |
--------------------------------------------------------------------------------
/codalab/competition/evaluation.html:
--------------------------------------------------------------------------------
1 | Evaluation Criteria
2 |
3 |
4 | The evaluation script is available on our
5 |
6 | git repository for reference. Note that the complete dataset is
7 | required to run all the metrics. Metrics requiring the full dataset are
8 | indicated as such in the list below. The complete dataset will be made
9 | available at the end of the competition.
10 |
11 |
12 | Participants may not use any external resource. This requirement is to
13 | ensure that all submissions are easily comparable. We will ask participants
14 | planning to submit a system description paper to forward a link to their code.
15 |
16 |
17 | Participants will also be invited to contribute their systems' outputs to a
18 | dataset of system productions. The purpose of this collection of system
19 | productions is to propose them as a shared task for upcoming text generation
20 | evaluation campaigns.
21 |
22 |
23 | Metrics for the definition modeling track
24 |
25 | Definition modeling submissions are evaluated using three metrics:
26 |
27 |
28 | -
29 | a MoverScore, appearing as MvSc. on the leaderboards; it is
30 | computed using the original
31 | implementation of
32 | Zhao et al. (2019).
33 |
34 | -
35 | a BLEU score , appearing as S-BLEU on the leaderboards. The
36 | S here stands for "sense-level", as it is computed using the target
37 | gloss as the sole reference for the production. We use the
38 | NLTK
39 | implementation.
40 |
41 | -
42 | a lemma-level BLEU score , appearing as L-BLEU on the
43 | leaderboards. Concretely, we compute the BLEU score for that production and
44 | all glosses with the same word and part of speech, and then select the
45 | maximum score among these. We introduce this score as some definition
46 | modeling examples share the same input (character-based embedding or
47 | word2vec representation) and yet have different targets. The complete
48 | dataset, which will be made available at the end of the competition, is
49 | required to group entries per lemma. Again, we use the
50 | NLTK
51 | implementation.
52 |
53 |
54 |
55 | Scoring a definition modeling submission using MoverScore on CPU takes some
56 | time (15min or more). Results may not be available immediately upon
57 | submission.
58 |
59 |
60 | Scores for distinct languages have different entries in the leaderboards, and
61 | will correspond to distinct official rankings in the task paper.
62 |
63 |
64 | Submissions to the definition modeling track must consist of a ZIP archive
65 | containing one or more JSON files. These JSON files must contain a list of
66 | JSON objects, each of which must at least contain two keys: "id" and "gloss".
67 | The id key is used to match submissions with references. The gloss key should
68 | map to the string production to be evaluated. See our
69 |
70 | git repository for an example architecture that can output the correct
71 | JSON format.
72 |
73 |
74 | To have your outputs scored, create a ZIP archive containing all the files you
75 | wish to submit, and upload it on CodaLab during the Evaluation phase. You can
76 | submit files for both tracks (definition modeling and reverse dictionary) at
77 | once in a single ZIP archive. Make sure that setups are unique: do not include
78 | two JSON files containing predictions for the same pair of track and language.
79 |
80 |
81 | Do not attempt to submit glosses for different languages with a single JSON
82 | submission file. This will fail. Instead, make distinct submission files per
83 | language.
84 |
85 |
86 | We strongly encourage you to check the format of your submission using our
87 |
88 | format checker before submitting to CodaLab. This script will also
89 | summarize how your submission will be understood by the scoring program.
90 |
91 |
92 | Metrics for the reverse dictionary track
93 |
94 | Reverse dictionary submissions are evaluated using three metrics:
95 |
96 |
97 | -
98 | mean squared error between the submission's reconstructed embedding
99 | and the reference embedding
100 |
101 | -
102 | cosine similarity between the submission's reconstructed embedding
103 | and the reference embedding
104 |
105 | -
106 | cosine-based ranking between the submission's reconstructed embedding
107 | and the reference embedding; i.e., how many other test items have a cosine
108 | with the reconstructed embedding higher than that with the reference
109 | embedding.
110 |
111 |
112 |
113 | Scores for distinct embeddings and languages have different entries in the
114 | leaderboards, and will corresponding to distinct official rankings in the task
115 | paper.
116 |
117 |
118 | Submissions to the reverse dictionary track must consist of a ZIP archive
119 | containing one or more JSON files. These JSON files must contain a list of
120 | JSON objects, each of which must at least contain two keys: "id" and one among
121 | "sgns", "char" or "electra", identifying which architecture your submission
122 | tries to reconstruct. The "id" key is used to match submissions with
123 | references. The other key(s) should map to the vector reconstruction to be
124 | evaluated, as a list of float components. See our
125 |
126 | git repository for an example architecture that can output the correct
127 | JSON format.
128 |
129 |
130 | To have your outputs scored, create a ZIP archive containing all the files you
131 | wish to submit, and upload it on CodaLab during the Evaluation phase. You can
132 | submit files for both tracks (reverse dictionary and definition modeling) at
133 | once in a single ZIP archive. Make sure that setups are unique: do not include
134 | two JSON files containing predictions for the same configuration of track,
135 | language and embedding architecture.
136 |
137 |
138 | Do not attempt to submit embeddings for different languages in a single JSON
139 | submission. This will fail. Instead, make distinct submission files per
140 | language. You may however group reconstructions for multiple architectures in
141 | a single submission file.
142 |
143 |
144 | We strongly encourage you to check the format of your submission using our
145 |
146 | format checker before submitting to CodaLab. This script will also
147 | summarize how your submission will be understood by the scoring program.
148 |
149 |
150 | Manual evaluations
151 |
152 | We very strongly encourage participants to make use of the trial dataset for
153 | running manual evaluations of their systems' production. The presence of a
154 | manual evaluation in system descriptions will be taken into account during the
155 | reviewing process.
156 |
157 |
--------------------------------------------------------------------------------
/code/data.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from itertools import count
3 | import json
4 | import random
5 |
6 | import torch
7 | from torch.nn.utils.rnn import pad_sequence
8 | from torch.utils.data import DataLoader, Dataset, Sampler
9 |
10 | BOS = ""
11 | EOS = ""
12 | PAD = ""
13 | UNK = ""
14 |
15 | SUPPORTED_ARCHS = ("sgns", "char")
16 |
17 | # A dataset is a container object for the actual data
18 | class JSONDataset(Dataset):
19 | """Reads a CODWOE JSON dataset"""
20 |
21 | def __init__(self, file, vocab=None, freeze_vocab=False, maxlen=256):
22 | """
23 | Construct a torch.utils.data.Dataset compatible with torch data API and
24 | codwoe data.
25 | args: `file` the path to the dataset file
26 | `vocab` a dictionary mapping strings to indices
27 | `freeze_vocab` whether to update vocabulary, or just replace unknown items with OOV token
28 | `maxlen` the maximum number of tokens per gloss
29 | """
30 | if vocab is None:
31 | self.vocab = defaultdict(count().__next__)
32 | else:
33 | self.vocab = defaultdict(count(len(vocab)).__next__)
34 | self.vocab.update(vocab)
35 | pad, eos, bos, unk = (
36 | self.vocab[PAD],
37 | self.vocab[EOS],
38 | self.vocab[BOS],
39 | self.vocab[UNK],
40 | )
41 | if freeze_vocab:
42 | self.vocab = dict(vocab)
43 | with open(file, "r") as istr:
44 | self.items = json.load(istr)
45 | # preparse data
46 | for json_dict in self.items:
47 | # in definition modeling test datasets, gloss targets are absent
48 | if "gloss" in json_dict:
49 | json_dict["gloss_tensor"] = torch.tensor(
50 | [bos]
51 | + [
52 | self.vocab[word]
53 | if not freeze_vocab
54 | else self.vocab.get(word, unk)
55 | for word in json_dict["gloss"].split()
56 | ]
57 | + [eos]
58 | )
59 | if maxlen:
60 | json_dict["gloss_tensor"] = json_dict["gloss_tensor"][:maxlen]
61 | # in reverse dictionary test datasets, vector targets are absent
62 | for arch in SUPPORTED_ARCHS:
63 | if arch in json_dict:
64 | json_dict[f"{arch}_tensor"] = torch.tensor(json_dict[arch])
65 | if "electra" in json_dict:
66 | json_dict["electra_tensor"] = torch.tensor(json_dict["electra"])
67 | self.has_gloss = "gloss" in self.items[0]
68 | self.has_vecs = SUPPORTED_ARCHS[0] in self.items[0]
69 | self.has_electra = "electra" in self.items[0]
70 | self.itos = sorted(self.vocab, key=lambda w: self.vocab[w])
71 |
72 | def __len__(self):
73 | return len(self.items)
74 |
75 | def __getitem__(self, index):
76 | return self.items[index]
77 |
78 | # we're adding this method to simplify the code in our predictions of
79 | # glosses
80 | def decode(self, tensor):
81 | """Convert a sequence of indices (possibly batched) to tokens"""
82 | with torch.no_grad():
83 | if tensor.dim() == 2:
84 | # we have batched tensors of shape [Seq x Batch]
85 | decoded = []
86 | for tensor_ in tensor.t():
87 | decoded.append(self.decode(tensor_))
88 | return decoded
89 | else:
90 | return " ".join(
91 | [self.itos[i.item()] for i in tensor if i != self.vocab[PAD]]
92 | )
93 |
94 | def save(self, file):
95 | torch.save(self, file)
96 |
97 | @staticmethod
98 | def load(file):
99 | return torch.load(file)
100 |
101 |
102 | # A sampler allows you to define how to select items from your Dataset. Torch
103 | # provides a number of default Sampler classes
104 | class TokenSampler(Sampler):
105 | """Produce batches with up to `batch_size` tokens in each batch"""
106 |
107 | def __init__(
108 | self, dataset, batch_size=200, size_fn=len, drop_last=False, shuffle=True
109 | ):
110 | """
111 | args: `dataset` a torch.utils.data.Dataset (iterable style)
112 | `batch_size` the maximum number of tokens in a batch
113 | `size_fn` a callable that yields the number of tokens in a dataset item
114 | `drop_last` if True and the data can't be divided in exactly the right number of batch, drop the last batch
115 | `shuffle` if True, shuffle between every iteration
116 | """
117 | self.dataset = dataset
118 | self.batch_size = batch_size
119 | self.size_fn = size_fn
120 | self._len = None
121 | self.drop_last = drop_last
122 | self.shuffle = True
123 |
124 | def __iter__(self):
125 | indices = range(len(self.dataset))
126 | if self.shuffle:
127 | indices = list(indices)
128 | random.shuffle(indices)
129 | i = 0
130 | selected = []
131 | numel = 0
132 | longest_len = 0
133 | for i in indices:
134 | if numel + self.size_fn(self.dataset[i]) > self.batch_size:
135 | if selected:
136 | yield selected
137 | selected = []
138 | numel = 0
139 | numel += self.size_fn(self.dataset[i])
140 | selected.append(i)
141 | if selected and not self.drop_last:
142 | yield selected
143 |
144 | def __len__(self):
145 | if self._len is None:
146 | self._len = (
147 | sum(self.size_fn(self.dataset[i]) for i in range(len(self.dataset)))
148 | // self.batch_size
149 | )
150 | return self._len
151 |
152 |
153 | # DataLoaders give access to an iterator over the dataset, using a sampling
154 | # strategy as defined through a Sampler.
155 | def get_dataloader(dataset, batch_size=200, shuffle=True):
156 | """produce dataloader.
157 | args: `dataset` a torch.utils.data.Dataset (iterable style)
158 | `batch_size` the maximum number of tokens in a batch
159 | `shuffle` if True, shuffle between every iteration
160 | """
161 | # some constants for the closures
162 | has_gloss = dataset.has_gloss
163 | has_vecs = dataset.has_vecs
164 | has_electra = dataset.has_electra
165 | PAD_idx = dataset.vocab[PAD]
166 |
167 | # the collate function has to convert a list of dataset items into a batch
168 | def do_collate(json_dicts):
169 | """collates example into a dict batch; produces ands pads tensors"""
170 | batch = defaultdict(list)
171 | for jdict in json_dicts:
172 | for key in jdict:
173 | batch[key].append(jdict[key])
174 | if has_gloss:
175 | batch["gloss_tensor"] = pad_sequence(
176 | batch["gloss_tensor"], padding_value=PAD_idx, batch_first=False
177 | )
178 | if has_vecs:
179 | for arch in SUPPORTED_ARCHS:
180 | batch[f"{arch}_tensor"] = torch.stack(batch[f"{arch}_tensor"])
181 | if has_electra:
182 | batch["electra_tensor"] = torch.stack(batch["electra_tensor"])
183 | return dict(batch)
184 |
185 | if dataset.has_gloss:
186 | # we try to keep the amount of gloss tokens roughly constant across all
187 | # batches.
188 | def do_size_item(item):
189 | """retrieve tensor size, so as to batch items per elements"""
190 | return item["gloss_tensor"].numel()
191 |
192 | return DataLoader(
193 | dataset,
194 | collate_fn=do_collate,
195 | batch_sampler=TokenSampler(
196 | dataset, batch_size=batch_size, size_fn=do_size_item, shuffle=shuffle
197 | ),
198 | )
199 | else:
200 | # there's no gloss, hence no gloss tokens, so we use a default batching
201 | # strategy.
202 | return DataLoader(
203 | dataset, collate_fn=do_collate, batch_size=batch_size, shuffle=shuffle
204 | )
205 |
--------------------------------------------------------------------------------
/code/revdict.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import logging
5 | import pathlib
6 | import sys
7 |
8 | logger = logging.getLogger(pathlib.Path(__file__).name)
9 | logger.setLevel(logging.DEBUG)
10 | handler = logging.StreamHandler(sys.stdout)
11 | handler.setFormatter(
12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
13 | )
14 | logger.addHandler(handler)
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.optim as optim
20 | from torch.utils.tensorboard import SummaryWriter
21 |
22 | import tqdm
23 |
24 | import data
25 | import models
26 |
27 |
28 | def get_parser(
29 | parser=argparse.ArgumentParser(
30 | description="Run a reverse dictionary baseline.\nThe task consists in reconstructing an embedding from the glosses listed in the datasets"
31 | ),
32 | ):
33 | parser.add_argument(
34 | "--do_train", action="store_true", help="whether to train a model from scratch"
35 | )
36 | parser.add_argument(
37 | "--do_pred", action="store_true", help="whether to produce predictions"
38 | )
39 | parser.add_argument(
40 | "--train_file", type=pathlib.Path, help="path to the train file"
41 | )
42 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file")
43 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file")
44 | parser.add_argument(
45 | "--device",
46 | type=torch.device,
47 | default=torch.device("cpu"),
48 | help="path to the train file",
49 | )
50 | parser.add_argument(
51 | "--target_arch",
52 | type=str,
53 | default="sgns",
54 | choices=("sgns", "char", "electra"),
55 | help="embedding architecture to use as target",
56 | )
57 | parser.add_argument(
58 | "--summary_logdir",
59 | type=pathlib.Path,
60 | default=pathlib.Path("logs") / f"revdict-baseline",
61 | help="write logs for future analysis",
62 | )
63 | parser.add_argument(
64 | "--save_dir",
65 | type=pathlib.Path,
66 | default=pathlib.Path("models") / f"revdict-baseline",
67 | help="where to save model & vocab",
68 | )
69 | parser.add_argument(
70 | "--pred_file",
71 | type=pathlib.Path,
72 | default=pathlib.Path("revdict-baseline-preds.json"),
73 | help="where to save predictions",
74 | )
75 | return parser
76 |
77 |
78 | def train(args):
79 | assert args.train_file is not None, "Missing dataset for training"
80 | # 1. get data, vocabulary, summary writer
81 | logger.debug("Preloading data")
82 | ## make datasets
83 | train_dataset = data.JSONDataset(args.train_file)
84 | if args.dev_file:
85 | dev_dataset = data.JSONDataset(args.dev_file, vocab=train_dataset.vocab)
86 | ## assert they correspond to the task
87 | assert train_dataset.has_gloss, "Training dataset contains no gloss."
88 | if args.target_arch == "electra":
89 | assert train_dataset.has_electra, "Training datatset contains no vector."
90 | else:
91 | assert train_dataset.has_vecs, "Training datatset contains no vector."
92 | if args.dev_file:
93 | assert dev_dataset.has_gloss, "Development dataset contains no gloss."
94 | if args.target_arch == "electra":
95 | assert dev_dataset.has_electra, "Development dataset contains no vector."
96 | else:
97 | assert dev_dataset.has_vecs, "Development dataset contains no vector."
98 | ## make dataloader
99 | train_dataloader = data.get_dataloader(train_dataset, batch_size=512)
100 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False, batch_size=1024)
101 | ## make summary writer
102 | summary_writer = SummaryWriter(args.summary_logdir)
103 | train_step = itertools.count() # to keep track of the training steps for logging
104 |
105 | # 2. construct model
106 | ## Hyperparams
107 | logger.debug("Setting up training environment")
108 | model = models.RevdictModel(dev_dataset.vocab).to(args.device)
109 | model.train()
110 |
111 | # 3. declare optimizer & criterion
112 | ## Hyperparams
113 | EPOCHS, LEARNING_RATE, BETA1, BETA2, WEIGHT_DECAY = 10, 1.0e-4, 0.9, 0.999, 1.0e-6
114 | optimizer = optim.AdamW(
115 | model.parameters(),
116 | lr=LEARNING_RATE,
117 | betas=(BETA1, BETA2),
118 | weight_decay=WEIGHT_DECAY,
119 | )
120 | criterion = nn.MSELoss()
121 |
122 | vec_tensor_key = f"{args.target_arch}_tensor"
123 |
124 | # 4. train model
125 | for epoch in tqdm.trange(EPOCHS, desc="Epochs"):
126 | ## train loop
127 | pbar = tqdm.tqdm(
128 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False
129 | )
130 | for batch in train_dataloader:
131 | optimizer.zero_grad()
132 | gls = batch["gloss_tensor"].to(args.device)
133 | vec = batch[vec_tensor_key].to(args.device)
134 | pred = model(gls)
135 | loss = criterion(pred, vec)
136 | loss.backward()
137 | # keep track of the train loss for this step
138 | next_step = next(train_step)
139 | summary_writer.add_scalar(
140 | "revdict-train/cos",
141 | F.cosine_similarity(pred, vec).mean().item(),
142 | next_step,
143 | )
144 | summary_writer.add_scalar("revdict-train/mse", loss.item(), next_step)
145 | optimizer.step()
146 | pbar.update(vec.size(0))
147 | pbar.close()
148 | ## eval loop
149 | if args.dev_file:
150 | model.eval()
151 | with torch.no_grad():
152 | sum_dev_loss, sum_cosine = 0.0, 0.0
153 | pbar = tqdm.tqdm(
154 | desc=f"Eval {epoch}",
155 | total=len(dev_dataset),
156 | disable=None,
157 | leave=False,
158 | )
159 | for batch in dev_dataloader:
160 | gls = batch["gloss_tensor"].to(args.device)
161 | vec = batch[vec_tensor_key].to(args.device)
162 | pred = model(gls)
163 | sum_dev_loss += (
164 | F.mse_loss(pred, vec, reduction="none").mean(1).sum().item()
165 | )
166 | sum_cosine += F.cosine_similarity(pred, vec).sum().item()
167 | pbar.update(vec.size(0))
168 | # keep track of the average loss on dev set for this epoch
169 | summary_writer.add_scalar(
170 | "revdict-dev/cos", sum_cosine / len(dev_dataset), epoch
171 | )
172 | summary_writer.add_scalar(
173 | "revdict-dev/mse", sum_dev_loss / len(dev_dataset), epoch
174 | )
175 | pbar.close()
176 | model.train()
177 |
178 | # 5. save result
179 | model.save(args.save_dir / "model.pt")
180 | train_dataset.save(args.save_dir / "train_dataset.pt")
181 | dev_dataset.save(args.save_dir / "dev_dataset.pt")
182 |
183 |
184 | def pred(args):
185 | assert args.test_file is not None, "Missing dataset for test"
186 | # 1. retrieve vocab, dataset, model
187 | model = models.DefmodModel.load(args.save_dir / "model.pt")
188 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab
189 | test_dataset = data.JSONDataset(
190 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen
191 | )
192 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1024)
193 | model.eval()
194 | vec_tensor_key = f"{args.target_arch}_tensor"
195 | assert test_dataset.has_gloss, "File is not usable for the task"
196 | # 2. make predictions
197 | predictions = []
198 | with torch.no_grad():
199 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset))
200 | for batch in test_dataloader:
201 | vecs = model(batch["gloss_tensor"].to(args.device)).cpu()
202 | for id, vec in zip(batch["id"], vecs.unbind()):
203 | predictions.append(
204 | {"id": id, args.target_arch: vec.view(-1).cpu().tolist()}
205 | )
206 | pbar.update(vecs.size(0))
207 | pbar.close()
208 | with open(args.pred_file, "w") as ostr:
209 | json.dump(predictions, ostr)
210 |
211 |
212 | def main(args):
213 | if args.do_train:
214 | logger.debug("Performing revdict training")
215 | train(args)
216 | if args.do_pred:
217 | logger.debug("Performing revdict prediction")
218 | pred(args)
219 |
220 |
221 | if __name__ == "__main__":
222 | args = get_parser().parse_args()
223 | main(args)
224 |
--------------------------------------------------------------------------------
/code/defmod.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import logging
5 | import pathlib
6 | import sys
7 |
8 | logger = logging.getLogger(pathlib.Path(__file__).name)
9 | logger.setLevel(logging.DEBUG)
10 | handler = logging.StreamHandler(sys.stdout)
11 | handler.setFormatter(
12 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
13 | )
14 | logger.addHandler(handler)
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.optim as optim
20 | from torch.utils.tensorboard import SummaryWriter
21 |
22 | import tqdm
23 |
24 | import data
25 | import models
26 |
27 |
28 | def get_parser(
29 | parser=argparse.ArgumentParser(description="run a definition modeling baseline"),
30 | ):
31 | parser.add_argument(
32 | "--do_train", action="store_true", help="whether to train a model from scratch"
33 | )
34 | parser.add_argument(
35 | "--do_pred", action="store_true", help="whether to produce predictions"
36 | )
37 | parser.add_argument(
38 | "--train_file", type=pathlib.Path, help="path to the train file"
39 | )
40 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file")
41 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file")
42 | parser.add_argument(
43 | "--device",
44 | type=torch.device,
45 | default=torch.device("cpu"),
46 | help="path to the train file",
47 | )
48 | parser.add_argument(
49 | "--source_arch",
50 | type=str,
51 | default="sgns",
52 | choices=("sgns", "char", "electra"),
53 | help="embedding architecture to use as source",
54 | )
55 | parser.add_argument(
56 | "--summary_logdir",
57 | type=pathlib.Path,
58 | default=pathlib.Path("logs") / "defmod-baseline",
59 | help="write logs for future analysis",
60 | )
61 | parser.add_argument(
62 | "--save_dir",
63 | type=pathlib.Path,
64 | default=pathlib.Path("models") / "defmod-baseline",
65 | help="where to save model & vocab",
66 | )
67 | parser.add_argument(
68 | "--pred_file",
69 | type=pathlib.Path,
70 | default=pathlib.Path("defmod-baseline-preds.json"),
71 | help="where to save predictions",
72 | )
73 | return parser
74 |
75 |
76 | def train(args):
77 | assert args.train_file is not None, "Missing dataset for training"
78 | # 1. get data, vocabulary, summary writer
79 | logger.debug("Preloading training data")
80 | ## make datasets
81 | train_dataset = data.JSONDataset(args.train_file)
82 | if args.dev_file:
83 | dev_dataset = data.JSONDataset(args.dev_file, vocab=train_dataset.vocab)
84 | ## assert they correspond to the task
85 | assert train_dataset.has_gloss, "Training dataset contains no gloss."
86 | if args.source_arch == "electra":
87 | assert train_dataset.has_electra, "Training datatset contains no vector."
88 | else:
89 | assert train_dataset.has_vecs, "Training datatset contains no vector."
90 | if args.dev_file:
91 | assert dev_dataset.has_gloss, "Development dataset contains no gloss."
92 | if args.source_arch == "electra":
93 | assert dev_dataset.has_electra, "Development dataset contains no vector."
94 | else:
95 | assert dev_dataset.has_vecs, "Development dataset contains no vector."
96 | ## make dataloader
97 | train_dataloader = data.get_dataloader(train_dataset)
98 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False)
99 | ## make summary writer
100 | summary_writer = SummaryWriter(args.summary_logdir)
101 | train_step = itertools.count() # to keep track of the training steps for logging
102 |
103 | # 2. construct model
104 | logger.debug("Setting up training environment")
105 |
106 | model = models.DefmodModel(dev_dataset.vocab).to(args.device)
107 | model.train()
108 |
109 | # 3. declare optimizer & criterion
110 | ## Hyperparams
111 | EPOCHS, LEARNING_RATE, BETA1, BETA2, WEIGHT_DECAY = 10, 1.0e-4, 0.9, 0.999, 1.0e-6
112 | optimizer = optim.AdamW(
113 | model.parameters(),
114 | lr=LEARNING_RATE,
115 | betas=(BETA1, BETA2),
116 | weight_decay=WEIGHT_DECAY,
117 | )
118 | criterion = nn.CrossEntropyLoss(ignore_index=model.padding_idx)
119 |
120 | vec_tensor_key = f"{args.source_arch}_tensor"
121 |
122 | # 4. train model
123 | for epoch in tqdm.trange(EPOCHS, desc="Epochs"):
124 | ## train loop
125 | pbar = tqdm.tqdm(
126 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False
127 | )
128 | for batch in train_dataloader:
129 | optimizer.zero_grad()
130 | vec = batch[vec_tensor_key].to(args.device)
131 | gls = batch["gloss_tensor"].to(args.device)
132 | pred = model(vec, gls[:-1])
133 | loss = criterion(pred.view(-1, pred.size(-1)), gls.view(-1))
134 | loss.backward()
135 | # keep track of the train loss for this step
136 | tokens = gls != model.padding_idx
137 | acc = (
138 | ((pred.argmax(-1) == gls) & tokens).float().sum() / tokens.sum()
139 | ).item()
140 | step = next(train_step)
141 | summary_writer.add_scalar("defmod-train/xent", loss.item(), step)
142 | summary_writer.add_scalar("defmod-train/acc", acc, step)
143 | optimizer.step()
144 | pbar.update(vec.size(0))
145 | pbar.close()
146 | ## eval loop
147 | if args.dev_file:
148 | model.eval()
149 | with torch.no_grad():
150 | sum_dev_loss = 0.0
151 | sum_acc = 0
152 | ntoks = 0
153 | pbar = tqdm.tqdm(
154 | desc=f"Eval {epoch}",
155 | total=len(dev_dataset),
156 | disable=None,
157 | leave=False,
158 | )
159 | for batch in dev_dataloader:
160 | vec = batch[vec_tensor_key].to(args.device)
161 | gls = batch["gloss_tensor"].to(args.device)
162 | pred = model(vec, gls[:-1])
163 | sum_dev_loss += F.cross_entropy(
164 | pred.view(-1, pred.size(-1)),
165 | gls.view(-1),
166 | reduction="sum",
167 | ignore_index=model.padding_idx,
168 | ).item()
169 | tokens = gls != model.padding_idx
170 | ntoks += tokens.sum().item()
171 | sum_acc += ((pred.argmax(-1) == gls) & tokens).sum().item()
172 | pbar.update(vec.size(0))
173 |
174 | # keep track of the average loss & acc on dev set for this epoch
175 | summary_writer.add_scalar(
176 | "defmod-dev/xent", sum_dev_loss / ntoks, epoch
177 | )
178 | summary_writer.add_scalar("defmod-dev/acc", sum_acc / ntoks, epoch)
179 | pbar.close()
180 | model.train()
181 |
182 | # 5. save result
183 | model.save(args.save_dir / "model.pt")
184 | train_dataset.save(args.save_dir / "train_dataset.pt")
185 | dev_dataset.save(args.save_dir / "dev_dataset.pt")
186 |
187 |
188 | def pred(args):
189 | assert args.test_file is not None, "Missing dataset for test"
190 | # 1. retrieve vocab, dataset, model
191 | model = models.DefmodModel.load(args.save_dir / "model.pt")
192 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab
193 | test_dataset = data.JSONDataset(
194 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen
195 | )
196 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False)
197 | model.eval()
198 | vec_tensor_key = f"{args.source_arch}_tensor"
199 | if args.source_arch == "electra":
200 | assert test_dataset.has_electra, "File is not usable for the task"
201 | else:
202 | assert test_dataset.has_vecs, "File is not usable for the task"
203 | # 2. make predictions
204 | predictions = []
205 | with torch.no_grad():
206 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset), disable=None)
207 | for batch in test_dataloader:
208 | sequence = model.pred(batch[vec_tensor_key].to(args.device))
209 | for id, gloss in zip(batch["id"], test_dataset.decode(sequence)):
210 | predictions.append({"id": id, "gloss": gloss})
211 | pbar.update(batch[vec_tensor_key].size(0))
212 | pbar.close()
213 | # 3. dump predictions
214 | with open(args.pred_file, "a") as ostr:
215 | json.dump(predictions, ostr)
216 |
217 |
218 | def main(args):
219 | if args.do_train:
220 | logger.debug("Performing defmod training")
221 | train(args)
222 | if args.do_pred:
223 | logger.debug("Performing defmod prediction")
224 | pred(args)
225 |
226 |
227 | if __name__ == "__main__":
228 | args = get_parser().parse_args()
229 | main(args)
230 |
--------------------------------------------------------------------------------
/baseline_archs/code/score.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import collections
3 | import itertools
4 | import json
5 | import logging
6 | import os
7 | import pathlib
8 | import sys
9 |
10 | logger = logging.getLogger(pathlib.Path(__file__).name)
11 | logger.setLevel(logging.DEBUG)
12 | handler = logging.StreamHandler(sys.stdout)
13 | handler.setFormatter(
14 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
15 | )
16 | logger.addHandler(handler)
17 |
18 | os.environ["MOVERSCORE_MODEL"] = "distilbert-base-multilingual-cased"
19 | import moverscore_v2 as mv_sc
20 |
21 | from nltk.translate.bleu_score import sentence_bleu as bleu
22 | from nltk import word_tokenize as tokenize
23 |
24 | import numpy as np
25 |
26 | import torch
27 | import torch.nn.functional as F
28 |
29 | import tqdm
30 |
31 | import check_output
32 |
33 |
34 | def get_parser(parser=argparse.ArgumentParser(description="score a submission")):
35 | parser.add_argument(
36 | "submission_path",
37 | type=pathlib.Path,
38 | help="path to submission file to be scored, or to a directory of submissions to be scored",
39 | )
40 | parser.add_argument(
41 | "--reference_files_dir",
42 | type=pathlib.Path,
43 | help="directory containing all reference files",
44 | default=pathlib.Path("data"),
45 | )
46 | parser.add_argument(
47 | "--output_file",
48 | type=pathlib.Path,
49 | help="default path to print output",
50 | default=pathlib.Path("scores.txt"),
51 | )
52 | return parser
53 |
54 |
55 | def mover_corpus_score(sys_stream, ref_streams, trace=0):
56 | """Adapted from the MoverScore github"""
57 |
58 | if isinstance(sys_stream, str):
59 | sys_stream = [sys_stream]
60 | if isinstance(ref_streams, str):
61 | ref_streams = [[ref_streams]]
62 | fhs = [sys_stream] + ref_streams
63 | corpus_score = 0
64 | pbar = tqdm.tqdm(desc="MvSc.", disable=None, total=len(sys_stream))
65 | for lines in itertools.zip_longest(*fhs):
66 | if None in lines:
67 | raise EOFError("Source and reference streams have different lengths!")
68 | hypo, *refs = lines
69 | idf_dict_hyp = collections.defaultdict(lambda: 1.0)
70 | idf_dict_ref = collections.defaultdict(lambda: 1.0)
71 | corpus_score += mv_sc.word_mover_score(
72 | refs,
73 | [hypo],
74 | idf_dict_ref,
75 | idf_dict_hyp,
76 | stop_words=[],
77 | n_gram=1,
78 | remove_subwords=False,
79 | )[0]
80 | pbar.update()
81 | pbar.close()
82 | corpus_score /= len(sys_stream)
83 | return corpus_score
84 |
85 |
86 | def eval_defmod(args, summary):
87 | # 1. read contents
88 | ## define accumulators for lemma-level BLEU and MoverScore
89 | reference_lemma_groups = collections.defaultdict(list)
90 | all_preds, all_tgts = [], []
91 | ## reading data files
92 | with open(args.submission_file, "r") as fp:
93 | submission = sorted(json.load(fp), key=lambda r: r["id"])
94 | with open(args.reference_file, "r") as fp:
95 | reference = sorted(json.load(fp), key=lambda r: r["id"])
96 |
97 | # 2. compute scores
98 | ## compute sense-level BLEU
99 | assert len(submission) == len(reference), "Missing items in submission!"
100 | id_to_lemma = {}
101 | pbar = tqdm.tqdm(total=len(submission), desc="S-BLEU", disable=None)
102 | for sub, ref in zip(submission, reference):
103 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!"
104 | all_preds.append(sub["gloss"])
105 | all_tgts.append(ref["gloss"])
106 | sub["gloss"] = tokenize(sub["gloss"])
107 | ref["gloss"] = tokenize(ref["gloss"])
108 | sub["sense-BLEU"] = bleu([sub["gloss"]], ref["gloss"])
109 | reference_lemma_groups[(ref["word"], ref["pos"])].append(ref["gloss"])
110 | id_to_lemma[sub["id"]] = (ref["word"], ref["pos"])
111 | pbar.update()
112 | pbar.close()
113 | ## compute lemma-level BLEU
114 | for sub in tqdm.tqdm(submission, desc="L-BLEU", disable=None):
115 | sub["lemma-BLEU"] = max(
116 | bleu([sub["gloss"]], g)
117 | for g in reference_lemma_groups[id_to_lemma[sub["id"]]]
118 | )
119 | lemma_bleu_average = sum(s["lemma-BLEU"] for s in submission) / len(submission)
120 | sense_bleu_average = sum(s["sense-BLEU"] for s in submission) / len(submission)
121 | ## compute MoverScore
122 | # moverscore_average = np.mean(mv_sc.word_mover_score(
123 | # all_tgts,
124 | # all_preds,
125 | # collections.defaultdict(lambda:1.),
126 | # collections.defaultdict(lambda:1.),
127 | # stop_words=[],
128 | # n_gram=1,
129 | # remove_subwords=False,
130 | # batch_size=1,
131 | # ))
132 | moverscore_average = mover_corpus_score(all_preds, [all_tgts])
133 | # 3. write results.
134 | # logger.debug(f"Submission {args.submission_file}, \n\tMvSc.: " + \
135 | # f"{moverscore_average}\n\tL-BLEU: {lemma_bleu_average}\n\tS-BLEU: " + \
136 | # f"{sense_bleu_average}"
137 | # )
138 | with open(args.output_file, "a") as ostr:
139 | print(f"MoverScore_{summary.lang}:{moverscore_average}", file=ostr)
140 | print(f"BLEU_lemma_{summary.lang}:{lemma_bleu_average}", file=ostr)
141 | print(f"BLEU_sense_{summary.lang}:{sense_bleu_average}", file=ostr)
142 | return (
143 | args.submission_file,
144 | moverscore_average,
145 | lemma_bleu_average,
146 | sense_bleu_average,
147 | )
148 |
149 |
150 | def rank_cosine(preds, targets):
151 | unique_targets = targets.unique(dim=0)
152 | all_assocs = preds @ F.normalize(targets).T
153 | unique_assocs = preds @ F.normalize(unique_targets).T
154 | refs = torch.diagonal(all_assocs, 0).unsqueeze(1)
155 | ranks = (unique_assocs >= refs).sum(1).float().mean().item()
156 | return ranks / unique_targets.size(0)
157 |
158 |
159 | def eval_revdict(args, summary):
160 | # 1. read contents
161 | ## read data files
162 | with open(args.submission_file, "r") as fp:
163 | submission = sorted(json.load(fp), key=lambda r: r["id"])
164 | with open(args.reference_file, "r") as fp:
165 | reference = sorted(json.load(fp), key=lambda r: r["id"])
166 | vec_archs = sorted(
167 | set(submission[0].keys())
168 | - {
169 | "id",
170 | "gloss",
171 | "word",
172 | "pos",
173 | "concrete",
174 | "example",
175 | "f_rnk",
176 | "counts",
177 | "polysemous",
178 | }
179 | )
180 | ## define accumulators for rank-cosine
181 | all_preds = collections.defaultdict(list)
182 | all_refs = collections.defaultdict(list)
183 |
184 | assert len(submission) == len(reference), "Missing items in submission!"
185 | ## retrieve vectors
186 | for sub, ref in zip(submission, reference):
187 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!"
188 | for arch in vec_archs:
189 | all_preds[arch].append(sub[arch])
190 | all_refs[arch].append(ref[arch])
191 |
192 | torch.autograd.set_grad_enabled(False)
193 | all_preds = {arch: torch.tensor(all_preds[arch]) for arch in vec_archs}
194 | all_refs = {arch: torch.tensor(all_refs[arch]) for arch in vec_archs}
195 |
196 | # 2. compute scores
197 | MSE_scores = {
198 | arch: F.mse_loss(all_preds[arch], all_refs[arch]).item() for arch in vec_archs
199 | }
200 | cos_scores = {
201 | arch: F.cosine_similarity(all_preds[arch], all_refs[arch]).mean().item()
202 | for arch in vec_archs
203 | }
204 | rnk_scores = {
205 | arch: rank_cosine(all_preds[arch], all_refs[arch]) for arch in vec_archs
206 | }
207 | # 3. display results
208 | # logger.debug(f"Submission {args.submission_file}, \n\tMSE: " + \
209 | # ", ".join(f"{a}={MSE_scores[a]}" for a in vec_archs) + \
210 | # ", \n\tcosine: " + \
211 | # ", ".join(f"{a}={cos_scores[a]}" for a in vec_archs) + \
212 | # ", \n\tcosine ranks: " + \
213 | # ", ".join(f"{a}={rnk_scores[a]}" for a in vec_archs) + \
214 | # "."
215 | # )
216 | # all_archs = sorted(set(reference[0].keys()) - {"id", "gloss", "word", "pos"})
217 | with open(args.output_file, "a") as ostr:
218 | for arch in vec_archs:
219 | print(f"MSE_{summary.lang}_{arch}:{MSE_scores[arch]}", file=ostr)
220 | print(f"cos_{summary.lang}_{arch}:{cos_scores[arch]}", file=ostr)
221 | print(f"rnk_{summary.lang}_{arch}:{rnk_scores[arch]}", file=ostr)
222 | return (
223 | args.submission_file,
224 | *[MSE_scores.get(a, None) for a in vec_archs],
225 | *[cos_scores.get(a, None) for a in vec_archs],
226 | )
227 |
228 |
229 | def main(args):
230 | def do_score(submission_file, summary):
231 | args.submission_file = submission_file
232 | args.reference_file = (
233 | args.reference_files_dir
234 | / f"{summary.lang}.test.{summary.track}.complete.json"
235 | )
236 | eval_func = eval_revdict if summary.track == "revdict" else eval_defmod
237 | eval_func(args, summary)
238 |
239 | if args.output_file.is_dir():
240 | args.output_file = args.output_file / "scores.txt"
241 | # wipe file if exists
242 | open(args.output_file, "w").close()
243 | if args.submission_path.is_dir():
244 | files = list(args.submission_path.glob("*.json"))
245 | assert len(files) >= 1, "No data to score!"
246 | summaries = [check_output.main(f) for f in files]
247 | assert len(set(summaries)) == len(files), "Ensure files map to unique setups."
248 | rd_cfg = [
249 | (s.lang, a) for s in summaries if s.track == "revdict" for a in s.vec_archs
250 | ]
251 | assert len(set(rd_cfg)) == len(rd_cfg), "Ensure files map to unique setups."
252 | for summary, submitted_file in zip(summaries, files):
253 | do_score(submitted_file, summary)
254 | else:
255 | summary = check_output.main(args.submission_path)
256 | do_score(args.submission_path, summary)
257 |
258 |
259 | if __name__ == "__main__":
260 | main(get_parser().parse_args())
261 |
--------------------------------------------------------------------------------
/code/score.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import collections
3 | import itertools
4 | import json
5 | import logging
6 | import os
7 | import pathlib
8 | import sys
9 |
10 | logger = logging.getLogger(pathlib.Path(__file__).name)
11 | logger.setLevel(logging.DEBUG)
12 | handler = logging.StreamHandler(sys.stdout)
13 | handler.setFormatter(
14 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
15 | )
16 | logger.addHandler(handler)
17 |
18 | os.environ["MOVERSCORE_MODEL"] = "distilbert-base-multilingual-cased"
19 | import moverscore_v2 as mv_sc
20 |
21 | from nltk.translate.bleu_score import sentence_bleu
22 | from nltk.translate.bleu_score import SmoothingFunction
23 | from nltk import word_tokenize as tokenize
24 |
25 | import numpy as np
26 |
27 | import torch
28 | import torch.nn.functional as F
29 |
30 | import tqdm
31 |
32 | import check_output
33 |
34 |
35 | def get_parser(parser=argparse.ArgumentParser(description="score a submission")):
36 | parser.add_argument(
37 | "submission_path",
38 | type=pathlib.Path,
39 | help="path to submission file to be scored, or to a directory of submissions to be scored",
40 | )
41 | parser.add_argument(
42 | "--reference_files_dir",
43 | type=pathlib.Path,
44 | help="directory containing all reference files",
45 | default=pathlib.Path("data"),
46 | )
47 | parser.add_argument(
48 | "--output_file",
49 | type=pathlib.Path,
50 | help="default path to print output",
51 | default=pathlib.Path("scores.txt"),
52 | )
53 | return parser
54 |
55 | def bleu(pred, target, smoothing_function=SmoothingFunction().method4):
56 | return sentence_bleu([pred], target, smoothing_function=smoothing_function)
57 |
58 |
59 | def mover_corpus_score(sys_stream, ref_streams, trace=0):
60 | """Adapted from the MoverScore github"""
61 |
62 | if isinstance(sys_stream, str):
63 | sys_stream = [sys_stream]
64 | if isinstance(ref_streams, str):
65 | ref_streams = [[ref_streams]]
66 | fhs = [sys_stream] + ref_streams
67 | corpus_score = 0
68 | pbar = tqdm.tqdm(desc="MvSc.", disable=None, total=len(sys_stream))
69 | for lines in itertools.zip_longest(*fhs):
70 | if None in lines:
71 | raise EOFError("Source and reference streams have different lengths!")
72 | hypo, *refs = lines
73 | idf_dict_hyp = collections.defaultdict(lambda: 1.0)
74 | idf_dict_ref = collections.defaultdict(lambda: 1.0)
75 | corpus_score += mv_sc.word_mover_score(
76 | refs,
77 | [hypo],
78 | idf_dict_ref,
79 | idf_dict_hyp,
80 | stop_words=[],
81 | n_gram=1,
82 | remove_subwords=False,
83 | )[0]
84 | pbar.update()
85 | pbar.close()
86 | corpus_score /= len(sys_stream)
87 | return corpus_score
88 |
89 |
90 | def eval_defmod(args, summary):
91 | # 1. read contents
92 | ## define accumulators for lemma-level BLEU and MoverScore
93 | reference_lemma_groups = collections.defaultdict(list)
94 | all_preds, all_tgts = [], []
95 | ## reading data files
96 | with open(args.submission_file, "r") as fp:
97 | submission = sorted(json.load(fp), key=lambda r: r["id"])
98 | with open(args.reference_file, "r") as fp:
99 | reference = sorted(json.load(fp), key=lambda r: r["id"])
100 |
101 | # 2. compute scores
102 | ## compute sense-level BLEU
103 | assert len(submission) == len(reference), "Missing items in submission!"
104 | id_to_lemma = {}
105 | pbar = tqdm.tqdm(total=len(submission), desc="S-BLEU", disable=None)
106 | for sub, ref in zip(submission, reference):
107 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!"
108 | all_preds.append(sub["gloss"])
109 | all_tgts.append(ref["gloss"])
110 | sub["gloss"] = tokenize(sub["gloss"])
111 | ref["gloss"] = tokenize(ref["gloss"])
112 | sub["sense-BLEU"] = bleu(sub["gloss"], ref["gloss"])
113 | reference_lemma_groups[(ref["word"], ref["pos"])].append(ref["gloss"])
114 | id_to_lemma[sub["id"]] = (ref["word"], ref["pos"])
115 | pbar.update()
116 | pbar.close()
117 | ## compute lemma-level BLEU
118 | for sub in tqdm.tqdm(submission, desc="L-BLEU", disable=None):
119 | sub["lemma-BLEU"] = max(
120 | bleu(sub["gloss"], g)
121 | for g in reference_lemma_groups[id_to_lemma[sub["id"]]]
122 | )
123 | lemma_bleu_average = sum(s["lemma-BLEU"] for s in submission) / len(submission)
124 | sense_bleu_average = sum(s["sense-BLEU"] for s in submission) / len(submission)
125 | ## compute MoverScore
126 | # moverscore_average = np.mean(mv_sc.word_mover_score(
127 | # all_tgts,
128 | # all_preds,
129 | # collections.defaultdict(lambda:1.),
130 | # collections.defaultdict(lambda:1.),
131 | # stop_words=[],
132 | # n_gram=1,
133 | # remove_subwords=False,
134 | # batch_size=1,
135 | # ))
136 | moverscore_average = mover_corpus_score(all_preds, [all_tgts])
137 | # 3. write results.
138 | # logger.debug(f"Submission {args.submission_file}, \n\tMvSc.: " + \
139 | # f"{moverscore_average}\n\tL-BLEU: {lemma_bleu_average}\n\tS-BLEU: " + \
140 | # f"{sense_bleu_average}"
141 | # )
142 | with open(args.output_file, "a") as ostr:
143 | print(f"MoverScore_{summary.lang}:{moverscore_average}", file=ostr)
144 | print(f"BLEU_lemma_{summary.lang}:{lemma_bleu_average}", file=ostr)
145 | print(f"BLEU_sense_{summary.lang}:{sense_bleu_average}", file=ostr)
146 | return (
147 | args.submission_file,
148 | moverscore_average,
149 | lemma_bleu_average,
150 | sense_bleu_average,
151 | )
152 |
153 |
154 | def rank_cosine(preds, targets):
155 | assocs = F.normalize(preds) @ F.normalize(targets).T
156 | refs = torch.diagonal(assocs, 0).unsqueeze(1)
157 | ranks = (assocs >= refs).sum(1).float()
158 | assert ranks.numel() == preds.size(0)
159 | ranks = ranks.mean().item()
160 | return ranks / preds.size(0)
161 |
162 |
163 | def eval_revdict(args, summary):
164 | # 1. read contents
165 | ## read data files
166 | with open(args.submission_file, "r") as fp:
167 | submission = sorted(json.load(fp), key=lambda r: r["id"])
168 | with open(args.reference_file, "r") as fp:
169 | reference = sorted(json.load(fp), key=lambda r: r["id"])
170 | vec_archs = sorted(
171 | set(submission[0].keys())
172 | - {
173 | "id",
174 | "gloss",
175 | "word",
176 | "pos",
177 | "concrete",
178 | "example",
179 | "f_rnk",
180 | "counts",
181 | "polysemous",
182 | }
183 | )
184 | ## define accumulators for rank-cosine
185 | all_preds = collections.defaultdict(list)
186 | all_refs = collections.defaultdict(list)
187 |
188 | assert len(submission) == len(reference), "Missing items in submission!"
189 | ## retrieve vectors
190 | for sub, ref in zip(submission, reference):
191 | assert sub["id"] == ref["id"], "Mismatch in submission and reference files!"
192 | for arch in vec_archs:
193 | all_preds[arch].append(sub[arch])
194 | all_refs[arch].append(ref[arch])
195 |
196 | torch.autograd.set_grad_enabled(False)
197 | all_preds = {arch: torch.tensor(all_preds[arch]) for arch in vec_archs}
198 | all_refs = {arch: torch.tensor(all_refs[arch]) for arch in vec_archs}
199 |
200 | # 2. compute scores
201 | MSE_scores = {
202 | arch: F.mse_loss(all_preds[arch], all_refs[arch]).item() for arch in vec_archs
203 | }
204 | cos_scores = {
205 | arch: F.cosine_similarity(all_preds[arch], all_refs[arch]).mean().item()
206 | for arch in vec_archs
207 | }
208 | rnk_scores = {
209 | arch: rank_cosine(all_preds[arch], all_refs[arch]) for arch in vec_archs
210 | }
211 | # 3. display results
212 | # logger.debug(f"Submission {args.submission_file}, \n\tMSE: " + \
213 | # ", ".join(f"{a}={MSE_scores[a]}" for a in vec_archs) + \
214 | # ", \n\tcosine: " + \
215 | # ", ".join(f"{a}={cos_scores[a]}" for a in vec_archs) + \
216 | # ", \n\tcosine ranks: " + \
217 | # ", ".join(f"{a}={rnk_scores[a]}" for a in vec_archs) + \
218 | # "."
219 | # )
220 | # all_archs = sorted(set(reference[0].keys()) - {"id", "gloss", "word", "pos"})
221 | with open(args.output_file, "a") as ostr:
222 | for arch in vec_archs:
223 | print(f"MSE_{summary.lang}_{arch}:{MSE_scores[arch]}", file=ostr)
224 | print(f"cos_{summary.lang}_{arch}:{cos_scores[arch]}", file=ostr)
225 | print(f"rnk_{summary.lang}_{arch}:{rnk_scores[arch]}", file=ostr)
226 | return (
227 | args.submission_file,
228 | *[MSE_scores.get(a, None) for a in vec_archs],
229 | *[cos_scores.get(a, None) for a in vec_archs],
230 | )
231 |
232 |
233 | def main(args):
234 | def do_score(submission_file, summary):
235 | args.submission_file = submission_file
236 | args.reference_file = (
237 | args.reference_files_dir
238 | / f"{summary.lang}.test.{summary.track}.complete.json"
239 | )
240 | eval_func = eval_revdict if summary.track == "revdict" else eval_defmod
241 | eval_func(args, summary)
242 |
243 | if args.output_file.is_dir():
244 | args.output_file = args.output_file / "scores.txt"
245 | # wipe file if exists
246 | open(args.output_file, "w").close()
247 | if args.submission_path.is_dir():
248 | files = list(args.submission_path.glob("*.json"))
249 | assert len(files) >= 1, "No data to score!"
250 | summaries = [check_output.main(f) for f in files]
251 | assert len(set(summaries)) == len(files), "Ensure files map to unique setups."
252 | rd_cfg = [
253 | (s.lang, a) for s in summaries if s.track == "revdict" for a in s.vec_archs
254 | ]
255 | assert len(set(rd_cfg)) == len(rd_cfg), "Ensure files map to unique setups."
256 | for summary, submitted_file in zip(summaries, files):
257 | do_score(submitted_file, summary)
258 | else:
259 | summary = check_output.main(args.submission_path)
260 | do_score(args.submission_path, summary)
261 |
262 |
263 | if __name__ == "__main__":
264 | main(get_parser().parse_args())
265 |
--------------------------------------------------------------------------------
/codalab/competition/competition.yaml:
--------------------------------------------------------------------------------
1 | title: CODWOE - Comparing Dictionaries and Word Embeddings
2 | description: SemEval 2022 Task 1 - Are dictionary glosses and word embedding representations semantically equivalent? Can we generate one from the other?
3 | image: codwoe-logo.png
4 | has_registration: True
5 | allow_teams: True
6 | competition_docker_image: linguistickus/codwoe
7 | html:
8 | overview: overview.html
9 | evaluation: evaluation.html
10 | terms: terms_and_conditions.html
11 | data: data.html
12 | phases:
13 | 1:
14 | phasenumber: 1
15 | label: "Evaluation"
16 | color: blue
17 | start_date: 2022-01-10
18 | max_submissions: 50
19 | scoring_program: scoring_program.zip
20 | reference_data: reference_data.zip
21 | leaderboard_management_mode: hide_results
22 | 2:
23 | phasenumber: 2
24 | label: "Post-Evaluation"
25 | color: purple
26 | start_date: 2022-02-01
27 | phase_never_ends: True
28 | max_submissions: 999
29 | scoring_program: scoring_program.zip
30 | reference_data: reference_data.zip
31 | auto_migration: True
32 | leaderboard_management_mode: default
33 | leaderboard:
34 | leaderboards:
35 | DEFMOD: &DEFMOD
36 | label: Definition Modeling
37 | rank: 1
38 | REVDICT_SGNS: &REVDICT_SGNS
39 | label: Reverse Dictionary (SGNS)
40 | rank: 2
41 | REVDICT_ELECTRA: &REVDICT_ELECTRA
42 | label: Reverse Dictionary (ELECTRA)
43 | rank: 3
44 | REVDICT_CHAR: &REVDICT_CHAR
45 | label: Reverse Dictionary (Character Embeddings)
46 | rank: 4
47 | column_groups:
48 | English: &EN
49 | label: English
50 | Spanish: &ES
51 | label: Spanish
52 | French: &FR
53 | label: French
54 | Italian: &IT
55 | label: Italian
56 | Russian: &RU
57 | label: Russian
58 | columns:
59 | MoverScore_en:
60 | label: MvSc.
61 | leaderboard: *DEFMOD
62 | column_group: *EN
63 | rank: 1
64 | numeric_format: 3
65 | BLEU_sense_en:
66 | label: S-BLEU
67 | leaderboard: *DEFMOD
68 | column_group: *EN
69 | rank: 2
70 | numeric_format: 3
71 | BLEU_lemma_en:
72 | label: L-BLEU
73 | leaderboard: *DEFMOD
74 | column_group: *EN
75 | rank: 3
76 | numeric_format: 3
77 | rank: 2
78 | MoverScore_es:
79 | label: MvSc.
80 | leaderboard: *DEFMOD
81 | column_group: *ES
82 | rank: 1
83 | numeric_format: 3
84 | BLEU_sense_es:
85 | label: S-BLEU
86 | leaderboard: *DEFMOD
87 | column_group: *ES
88 | rank: 2
89 | numeric_format: 3
90 | BLEU_lemma_es:
91 | label: L-BLEU
92 | leaderboard: *DEFMOD
93 | column_group: *ES
94 | rank: 3
95 | numeric_format: 3
96 | MoverScore_fr:
97 | label: MvSc.
98 | leaderboard: *DEFMOD
99 | column_group: *FR
100 | rank: 1
101 | numeric_format: 3
102 | BLEU_sense_fr:
103 | label: S-BLEU
104 | leaderboard: *DEFMOD
105 | column_group: *FR
106 | rank: 2
107 | numeric_format: 3
108 | BLEU_lemma_fr:
109 | label: L-BLEU
110 | leaderboard: *DEFMOD
111 | column_group: *FR
112 | rank: 3
113 | numeric_format: 3
114 | MoverScore_it:
115 | label: MvSc.
116 | leaderboard: *DEFMOD
117 | column_group: *IT
118 | rank: 1
119 | numeric_format: 3
120 | BLEU_sense_it:
121 | label: S-BLEU
122 | leaderboard: *DEFMOD
123 | column_group: *IT
124 | rank: 2
125 | numeric_format: 3
126 | BLEU_lemma_it:
127 | label: L-BLEU
128 | leaderboard: *DEFMOD
129 | column_group: *IT
130 | rank: 3
131 | numeric_format: 3
132 | MoverScore_ru:
133 | label: MvSc.
134 | leaderboard: *DEFMOD
135 | column_group: *RU
136 | rank: 1
137 | numeric_format: 3
138 | BLEU_sense_ru:
139 | label: S-BLEU
140 | leaderboard: *DEFMOD
141 | column_group: *RU
142 | rank: 2
143 | numeric_format: 3
144 | BLEU_lemma_ru:
145 | label: L-BLEU
146 | leaderboard: *DEFMOD
147 | column_group: *RU
148 | rank: 3
149 | numeric_format: 3
150 | MSE_en_sgns:
151 | label: MSE
152 | leaderboard: *REVDICT_SGNS
153 | column_group: *EN
154 | rank: 1
155 | numeric_format: 3
156 | sort: asc
157 | cos_en_sgns:
158 | label: Cos
159 | leaderboard: *REVDICT_SGNS
160 | column_group: *EN
161 | rank: 2
162 | numeric_format: 3
163 | rnk_en_sgns:
164 | label: Rank
165 | leaderboard: *REVDICT_SGNS
166 | column_group: *EN
167 | rank: 3
168 | numeric_format: 3
169 | sort: asc
170 | MSE_es_sgns:
171 | label: MSE
172 | leaderboard: *REVDICT_SGNS
173 | column_group: *ES
174 | rank: 1
175 | numeric_format: 3
176 | sort: asc
177 | cos_es_sgns:
178 | label: Cos
179 | leaderboard: *REVDICT_SGNS
180 | column_group: *ES
181 | rank: 2
182 | numeric_format: 3
183 | rnk_es_sgns:
184 | label: Rank
185 | leaderboard: *REVDICT_SGNS
186 | column_group: *ES
187 | rank: 3
188 | numeric_format: 3
189 | sort: asc
190 | MSE_fr_sgns:
191 | label: MSE
192 | leaderboard: *REVDICT_SGNS
193 | column_group: *FR
194 | rank: 1
195 | numeric_format: 3
196 | sort: asc
197 | cos_fr_sgns:
198 | label: Cos
199 | leaderboard: *REVDICT_SGNS
200 | column_group: *FR
201 | rank: 2
202 | numeric_format: 3
203 | rnk_fr_sgns:
204 | label: Rank
205 | leaderboard: *REVDICT_SGNS
206 | column_group: *FR
207 | rank: 3
208 | numeric_format: 3
209 | sort: asc
210 | MSE_it_sgns:
211 | label: MSE
212 | leaderboard: *REVDICT_SGNS
213 | column_group: *IT
214 | rank: 1
215 | numeric_format: 3
216 | sort: asc
217 | cos_it_sgns:
218 | label: Cos
219 | leaderboard: *REVDICT_SGNS
220 | column_group: *IT
221 | rank: 2
222 | numeric_format: 3
223 | rnk_it_sgns:
224 | label: Rank
225 | leaderboard: *REVDICT_SGNS
226 | column_group: *IT
227 | rank: 3
228 | numeric_format: 3
229 | sort: asc
230 | MSE_ru_sgns:
231 | label: MSE
232 | leaderboard: *REVDICT_SGNS
233 | column_group: *RU
234 | rank: 1
235 | numeric_format: 3
236 | sort: asc
237 | cos_ru_sgns:
238 | label: Cos
239 | leaderboard: *REVDICT_SGNS
240 | column_group: *RU
241 | rank: 2
242 | numeric_format: 3
243 | rnk_ru_sgns:
244 | label: Rank
245 | leaderboard: *REVDICT_SGNS
246 | column_group: *RU
247 | rank: 3
248 | numeric_format: 3
249 | sort: asc
250 | MSE_en_electra:
251 | label: MSE
252 | leaderboard: *REVDICT_ELECTRA
253 | column_group: *EN
254 | rank: 1
255 | numeric_format: 3
256 | sort: asc
257 | cos_en_electra:
258 | label: Cos
259 | leaderboard: *REVDICT_ELECTRA
260 | column_group: *EN
261 | rank: 2
262 | numeric_format: 3
263 | rnk_en_electra:
264 | label: Rank
265 | leaderboard: *REVDICT_ELECTRA
266 | column_group: *EN
267 | rank: 3
268 | numeric_format: 3
269 | sort: asc
270 | MSE_fr_electra:
271 | label: MSE
272 | leaderboard: *REVDICT_ELECTRA
273 | column_group: *FR
274 | rank: 1
275 | numeric_format: 3
276 | sort: asc
277 | cos_fr_electra:
278 | label: Cos
279 | leaderboard: *REVDICT_ELECTRA
280 | column_group: *FR
281 | rank: 2
282 | numeric_format: 3
283 | rnk_fr_electra:
284 | label: Rank
285 | leaderboard: *REVDICT_ELECTRA
286 | column_group: *FR
287 | rank: 3
288 | numeric_format: 3
289 | sort: asc
290 | MSE_ru_electra:
291 | label: MSE
292 | leaderboard: *REVDICT_ELECTRA
293 | column_group: *RU
294 | rank: 1
295 | numeric_format: 3
296 | sort: asc
297 | cos_ru_electra:
298 | label: Cos
299 | leaderboard: *REVDICT_ELECTRA
300 | column_group: *RU
301 | rank: 2
302 | numeric_format: 3
303 | rnk_ru_electra:
304 | label: Rank
305 | leaderboard: *REVDICT_ELECTRA
306 | column_group: *RU
307 | rank: 3
308 | numeric_format: 3
309 | sort: asc
310 | MSE_en_char:
311 | label: MSE
312 | leaderboard: *REVDICT_CHAR
313 | column_group: *EN
314 | rank: 1
315 | numeric_format: 3
316 | sort: asc
317 | cos_en_char:
318 | label: Cos
319 | leaderboard: *REVDICT_CHAR
320 | column_group: *EN
321 | rank: 2
322 | numeric_format: 3
323 | rnk_en_char:
324 | label: Rank
325 | leaderboard: *REVDICT_CHAR
326 | column_group: *EN
327 | rank: 3
328 | numeric_format: 3
329 | sort: asc
330 | MSE_es_char:
331 | label: MSE
332 | leaderboard: *REVDICT_CHAR
333 | column_group: *ES
334 | rank: 1
335 | numeric_format: 3
336 | sort: asc
337 | cos_es_char:
338 | label: Cos
339 | leaderboard: *REVDICT_CHAR
340 | column_group: *ES
341 | rank: 2
342 | numeric_format: 3
343 | rnk_es_char:
344 | label: Rank
345 | leaderboard: *REVDICT_CHAR
346 | column_group: *ES
347 | rank: 3
348 | numeric_format: 3
349 | sort: asc
350 | MSE_fr_char:
351 | label: MSE
352 | leaderboard: *REVDICT_CHAR
353 | column_group: *FR
354 | rank: 1
355 | numeric_format: 3
356 | sort: asc
357 | cos_fr_char:
358 | label: Cos
359 | leaderboard: *REVDICT_CHAR
360 | column_group: *FR
361 | rank: 2
362 | numeric_format: 3
363 | rnk_fr_char:
364 | label: Rank
365 | leaderboard: *REVDICT_CHAR
366 | column_group: *FR
367 | rank: 3
368 | numeric_format: 3
369 | sort: asc
370 | MSE_it_char:
371 | label: MSE
372 | leaderboard: *REVDICT_CHAR
373 | column_group: *IT
374 | rank: 1
375 | numeric_format: 3
376 | sort: asc
377 | cos_it_char:
378 | label: Cos
379 | leaderboard: *REVDICT_CHAR
380 | column_group: *IT
381 | rank: 2
382 | numeric_format: 3
383 | rnk_it_char:
384 | label: Rank
385 | leaderboard: *REVDICT_CHAR
386 | column_group: *IT
387 | rank: 3
388 | numeric_format: 3
389 | sort: asc
390 | MSE_ru_char:
391 | label: MSE
392 | leaderboard: *REVDICT_CHAR
393 | column_group: *RU
394 | rank: 1
395 | numeric_format: 3
396 | sort: asc
397 | cos_ru_char:
398 | label: Cos
399 | leaderboard: *REVDICT_CHAR
400 | column_group: *RU
401 | rank: 2
402 | numeric_format: 3
403 | rnk_ru_char:
404 | label: Rank
405 | leaderboard: *REVDICT_CHAR
406 | column_group: *RU
407 | rank: 3
408 | numeric_format: 3
409 | sort: asc
410 |
--------------------------------------------------------------------------------
/baseline_archs/code/data.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from itertools import count
3 | import json
4 | import random
5 | import tempfile
6 |
7 | import torch
8 | from torch.nn.utils.rnn import pad_sequence
9 | from torch.utils.data import DataLoader, Dataset, Sampler
10 |
11 | import sentencepiece as spm
12 |
13 | BOS = ""
14 | EOS = ""
15 | PAD = ""
16 | UNK = ""
17 |
18 | SUPPORTED_ARCHS = ("sgns", "char")
19 |
20 | # A dataset is a container object for the actual data
21 | class JSONDataset(Dataset):
22 | """Reads a CODWOE JSON dataset"""
23 |
24 | def __init__(
25 | self,
26 | file,
27 | vocab=None,
28 | freeze_vocab=False,
29 | maxlen=256,
30 | spm_model_name=None,
31 | train_spm=False,
32 | ):
33 | """
34 | Construct a torch.utils.data.Dataset compatible with torch data API and
35 | codwoe data.
36 | args: `file` the path to the dataset file
37 | `vocab` a dictionary mapping strings to indices
38 | `freeze_vocab` whether to update vocabulary, or just replace unknown items with OOV token
39 | `maxlen` the maximum number of tokens per gloss
40 | `spm_model_name` create and use this sentencepiece model instead of whitespace tokenization
41 | """
42 | self.use_spm = spm_model_name is not None
43 | if vocab is None:
44 | self.vocab = defaultdict(count().__next__)
45 | else:
46 | self.vocab = defaultdict(count(len(vocab)).__next__)
47 | self.vocab.update(vocab)
48 | pad, eos, bos, unk = (
49 | self.vocab[PAD],
50 | self.vocab[EOS],
51 | self.vocab[BOS],
52 | self.vocab[UNK],
53 | )
54 | if freeze_vocab:
55 | self.vocab = dict(vocab)
56 | with open(file, "r") as istr:
57 | self.items = json.load(istr)
58 | if self.use_spm:
59 | if train_spm:
60 | with tempfile.NamedTemporaryFile(mode="w+") as temp_fp:
61 | for gls in (j["gloss"] for j in self.items):
62 | print(gls, file=temp_fp)
63 | temp_fp.seek(0)
64 | spm.SentencePieceTrainer.train(
65 | input=temp_fp.name,
66 | model_prefix=spm_model_name,
67 | vocab_size=15000,
68 | pad_id=pad,
69 | pad_piece=PAD,
70 | eos_id=eos,
71 | eos_piece=EOS,
72 | bos_id=bos,
73 | bos_piece=BOS,
74 | unk_id=unk,
75 | unk_piece=UNK,
76 | )
77 | self.spm_model = spm.SentencePieceProcessor(
78 | model_file=f"{spm_model_name}.model"
79 | )
80 | # preparse data
81 | for json_dict in self.items:
82 | # in definition modeling test datasets, gloss targets are absent
83 | if "gloss" in json_dict:
84 | if spm_model_name:
85 | json_dict["gloss_tensor"] = torch.tensor(
86 | self.spm_model.encode(
87 | json_dict["gloss"], add_eos=True, add_bos=True
88 | )
89 | )
90 | else:
91 | json_dict["gloss_tensor"] = torch.tensor(
92 | [bos]
93 | + [
94 | self.vocab[word]
95 | if not freeze_vocab
96 | else self.vocab.get(word, unk)
97 | for word in json_dict["gloss"].split()
98 | ]
99 | + [eos]
100 | )
101 | if maxlen:
102 | json_dict["gloss_tensor"] = json_dict["gloss_tensor"][:maxlen]
103 | # in reverse dictionary test datasets, vector targets are absent
104 | for arch in SUPPORTED_ARCHS:
105 | if arch in json_dict:
106 | json_dict[f"{arch}_tensor"] = torch.tensor(json_dict[arch])
107 | if "electra" in json_dict:
108 | json_dict["electra_tensor"] = torch.tensor(json_dict["electra"])
109 | if self.use_spm:
110 | self.vocab = {
111 | self.spm_model.id_to_piece(idx): idx
112 | for idx in range(self.spm_model.get_piece_size())
113 | }
114 |
115 | self.has_gloss = "gloss" in self.items[0]
116 | self.has_vecs = SUPPORTED_ARCHS[0] in self.items[0]
117 | self.has_electra = "electra" in self.items[0]
118 | self.itos = sorted(self.vocab, key=lambda w: self.vocab[w])
119 |
120 | def __len__(self):
121 | return len(self.items)
122 |
123 | def __getitem__(self, index):
124 | return self.items[index]
125 |
126 | # we're adding this method to simplify the code in our predictions of
127 | # glosses
128 | @torch.no_grad()
129 | def decode(self, tensor):
130 | """Convert a sequence of indices (possibly batched) to tokens"""
131 | if tensor.dim() == 2:
132 | # we have batched tensors of shape [Seq x Batch]
133 | decoded = []
134 | for tensor_ in tensor.t():
135 | decoded.append(self.decode(tensor_))
136 | return decoded
137 | else:
138 | ids = [i.item() for i in tensor if i != self.vocab[PAD]]
139 | if self.itos[ids[0]] == BOS: ids = ids[1:]
140 | if self.itos[ids[-1]] == EOS: ids = ids[:-1]
141 | if self.use_spm:
142 | return self.spm_model.decode(ids)
143 | return " ".join(self.itos[i] for i in ids)
144 |
145 | def save(self, file):
146 | torch.save(self, file)
147 |
148 | @staticmethod
149 | def load(file):
150 | return torch.load(file)
151 |
152 |
153 | # A sampler allows you to define how to select items from your Dataset. Torch
154 | # provides a number of default Sampler classes
155 | class TokenSampler(Sampler):
156 | """Produce batches with up to `batch_size` tokens in each batch"""
157 |
158 | def __init__(
159 | self, dataset, batch_size=150, size_fn=len, drop_last=False, shuffle=True
160 | ):
161 | """
162 | args: `dataset` a torch.utils.data.Dataset (iterable style)
163 | `batch_size` the maximum number of tokens in a batch
164 | `size_fn` a callable that yields the number of tokens in a dataset item
165 | `drop_last` if True and the data can't be divided in exactly the right number of batch, drop the last batch
166 | `shuffle` if True, shuffle between every iteration
167 | """
168 | self.dataset = dataset
169 | self.batch_size = batch_size
170 | self.size_fn = size_fn
171 | self._len = None
172 | self.drop_last = drop_last
173 | self.shuffle = True
174 |
175 | def __iter__(self):
176 | indices = range(len(self.dataset))
177 | if self.shuffle:
178 | indices = list(indices)
179 | random.shuffle(indices)
180 | i = 0
181 | selected = []
182 | numel = 0
183 | longest_len = 0
184 | for i in indices:
185 | if numel + self.size_fn(self.dataset[i]) > self.batch_size:
186 | if selected:
187 | yield selected
188 | selected = []
189 | numel = 0
190 | numel += self.size_fn(self.dataset[i])
191 | selected.append(i)
192 | if selected and not self.drop_last:
193 | yield selected
194 |
195 | def __len__(self):
196 | if self._len is None:
197 | self._len = round(
198 | sum(self.size_fn(self.dataset[i]) for i in range(len(self.dataset)))
199 | / self.batch_size
200 | )
201 | return self._len
202 |
203 |
204 | # DataLoaders give access to an iterator over the dataset, using a sampling
205 | # strategy as defined through a Sampler.
206 | def get_dataloader(dataset, batch_size=200, shuffle=True):
207 | """produce dataloader.
208 | args: `dataset` a torch.utils.data.Dataset (iterable style)
209 | `batch_size` the maximum number of tokens in a batch
210 | `shuffle` if True, shuffle between every iteration
211 | """
212 | # some constants for the closures
213 | has_gloss = dataset.has_gloss
214 | has_vecs = dataset.has_vecs
215 | has_electra = dataset.has_electra
216 | PAD_idx = dataset.vocab[PAD]
217 |
218 | # the collate function has to convert a list of dataset items into a batch
219 | def do_collate(json_dicts):
220 | """collates example into a dict batch; produces ands pads tensors"""
221 | batch = defaultdict(list)
222 | for jdict in json_dicts:
223 | for key in jdict:
224 | batch[key].append(jdict[key])
225 | if has_gloss:
226 | batch["gloss_tensor"] = pad_sequence(
227 | batch["gloss_tensor"], padding_value=PAD_idx, batch_first=False
228 | )
229 | if has_vecs:
230 | for arch in SUPPORTED_ARCHS:
231 | batch[f"{arch}_tensor"] = torch.stack(batch[f"{arch}_tensor"])
232 | if has_electra:
233 | batch["electra_tensor"] = torch.stack(batch["electra_tensor"])
234 | return dict(batch)
235 |
236 | if dataset.has_gloss:
237 | # we try to keep the amount of gloss tokens roughly constant across all
238 | # batches.
239 | def do_size_item(item):
240 | """retrieve tensor size, so as to batch items per elements"""
241 | return item["gloss_tensor"].numel()
242 |
243 | return DataLoader(
244 | dataset,
245 | collate_fn=do_collate,
246 | batch_sampler=TokenSampler(
247 | dataset, batch_size=batch_size, size_fn=do_size_item, shuffle=shuffle
248 | ),
249 | )
250 | else:
251 | # there's no gloss, hence no gloss tokens, so we use a default batching
252 | # strategy.
253 | return DataLoader(
254 | dataset, collate_fn=do_collate, batch_size=batch_size, shuffle=shuffle
255 | )
256 |
257 |
258 | def get_train_dataset(train_file, spm_model_path, save_dir):
259 | if (save_dir / "train_dataset.pt").is_file():
260 | dataset = JSONDataset.load(save_dir / "train_dataset.pt")
261 | else:
262 | dataset = JSONDataset(
263 | train_file,
264 | spm_model_name=spm_model_path.with_suffix(""),
265 | train_spm=not spm_model_path.with_suffix(".model").is_file(),
266 | )
267 | dataset.save(save_dir / "train_dataset.pt")
268 | return dataset
269 |
270 |
271 | def get_dev_dataset(dev_file, spm_model_path, save_dir, train_dataset=None):
272 | if (save_dir / "dev_dataset.pt").is_file():
273 | dataset = JSONDataset.load(save_dir / "dev_dataset.pt")
274 | else:
275 | dataset = JSONDataset(
276 | dev_file, spm_model_name=spm_model_path, train_spm=False
277 | )
278 | dataset.save(save_dir / "dev_dataset.pt")
279 | return dataset
280 |
--------------------------------------------------------------------------------
/baseline_archs/code/revdict.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import logging
5 | import pathlib
6 | import pprint
7 | import secrets
8 |
9 | import skopt
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | import torch.optim as optim
15 | from torch.utils.tensorboard import SummaryWriter
16 |
17 | import tqdm
18 |
19 | import data
20 | import models
21 |
22 | logger = logging.getLogger(pathlib.Path(__file__).name)
23 | logger.setLevel(logging.DEBUG)
24 | handler = logging.StreamHandler(tqdm.tqdm)
25 | handler.terminator = ""
26 | handler.setFormatter(
27 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
28 | )
29 | logger.addHandler(handler)
30 |
31 |
32 | def get_parser(
33 | parser=argparse.ArgumentParser(
34 | description="Run a reverse dictionary baseline.\nThe task consists in reconstructing an embedding from the glosses listed in the datasets"
35 | ),
36 | ):
37 | parser.add_argument(
38 | "--do_htune",
39 | action="store_true",
40 | help="whether to perform hyperparameter tuning",
41 | )
42 | parser.add_argument(
43 | "--do_train", action="store_true", help="whether to train a model from scratch"
44 | )
45 | parser.add_argument(
46 | "--do_pred", action="store_true", help="whether to produce predictions"
47 | )
48 | parser.add_argument(
49 | "--train_file", type=pathlib.Path, help="path to the train file"
50 | )
51 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file")
52 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file")
53 | parser.add_argument(
54 | "--device",
55 | type=torch.device,
56 | default=torch.device("cpu"),
57 | help="path to the train file",
58 | )
59 | parser.add_argument(
60 | "--target_arch",
61 | type=str,
62 | default="sgns",
63 | choices=("sgns", "char", "electra"),
64 | help="embedding architecture to use as target",
65 | )
66 | parser.add_argument(
67 | "--summary_logdir",
68 | type=pathlib.Path,
69 | default=pathlib.Path("logs") / f"revdict-baseline",
70 | help="write logs for future analysis",
71 | )
72 | parser.add_argument(
73 | "--save_dir",
74 | type=pathlib.Path,
75 | default=pathlib.Path("models") / f"revdict-baseline",
76 | help="where to save model & vocab",
77 | )
78 | parser.add_argument(
79 | "--spm_model_path",
80 | type=pathlib.Path,
81 | default=None,
82 | help="use sentencepiece model, if required train and save it here",
83 | )
84 | parser.add_argument(
85 | "--pred_file",
86 | type=pathlib.Path,
87 | default=pathlib.Path("revdict-baseline-preds.json"),
88 | help="where to save predictions",
89 | )
90 | return parser
91 |
92 |
93 | def get_search_space():
94 | """get hyperparmeters to optimize for"""
95 | search_space = [
96 | skopt.space.Real(1e-8, 1.0, "log-uniform", name="learning_rate"),
97 | skopt.space.Real(0.0, 1.0, "uniform", name="weight_decay"),
98 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_a"),
99 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_b"),
100 | skopt.space.Real(0.0, 0.9, "uniform", name="dropout"),
101 | skopt.space.Real(0.0, 1.0, "uniform", name="warmup_len"),
102 | skopt.space.Integer(1, 100, "log-uniform", name="batch_accum"),
103 | skopt.space.Integer(0, 5, "uniform", name="n_head_pow"),
104 | skopt.space.Integer(1, 6, "uniform", name="n_layers"),
105 | ]
106 | return search_space
107 |
108 |
109 | def train(
110 | train_file,
111 | dev_file,
112 | target_arch="sgns",
113 | summary_logdir=pathlib.Path("logs") / "revdict-htune",
114 | save_dir=pathlib.Path("models") / "revdict-baseline",
115 | device="cuda:0",
116 | spm_model_path=None,
117 | epochs=100,
118 | learning_rate=1e-4,
119 | beta1=0.9,
120 | beta2=0.999,
121 | weight_decay=1e-6,
122 | patience=5,
123 | batch_accum=1,
124 | dropout=0.3,
125 | warmup_len=0.1,
126 | n_head=4,
127 | n_layers=4,
128 | ):
129 | assert train_file is not None, "Missing dataset for training"
130 | assert dev_file is not None, "Missing dataset for development"
131 | # 1. get data, vocabulary, summary writer
132 | logger.debug("Preloading data")
133 | save_dir = save_dir / target_arch
134 | save_dir.mkdir(parents=True, exist_ok=True)
135 | ## make datasets
136 | train_dataset = data.get_train_dataset(train_file, spm_model_path, save_dir)
137 | dev_dataset = data.get_dev_dataset(
138 | dev_file, spm_model_path, save_dir, train_dataset
139 | )
140 |
141 | ## assert they correspond to the task
142 | assert train_dataset.has_gloss, "Training dataset contains no gloss."
143 | if target_arch == "electra":
144 | assert train_dataset.has_electra, "Training datatset contains no vector."
145 | else:
146 | assert train_dataset.has_vecs, "Training datatset contains no vector."
147 | assert dev_dataset.has_gloss, "Development dataset contains no gloss."
148 | if target_arch == "electra":
149 | assert dev_dataset.has_electra, "Development dataset contains no vector."
150 | else:
151 | assert dev_dataset.has_vecs, "Development dataset contains no vector."
152 | ## make dataloader
153 | train_dataloader = data.get_dataloader(train_dataset)
154 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False)
155 | ## make summary writer
156 | summary_writer = SummaryWriter(summary_logdir)
157 | train_step = itertools.count() # to keep track of the training steps for logging
158 |
159 | # 2. construct model
160 | ## Hyperparams
161 | logger.debug("Setting up training environment")
162 | model = models.RevdictModel(
163 | dev_dataset.vocab, n_head=n_head, n_layers=n_layers, dropout=dropout
164 | )
165 | model = model.to(device)
166 | model.train()
167 |
168 | # 3. declare optimizer & criterion
169 | ## Hyperparams
170 | optimizer = optim.AdamW(
171 | model.parameters(),
172 | lr=learning_rate,
173 | betas=(beta1, beta2),
174 | weight_decay=weight_decay,
175 | )
176 | criterion = nn.MSELoss()
177 |
178 | vec_tensor_key = f"{target_arch}_tensor"
179 | best_mse = float("inf")
180 | strikes = 0
181 |
182 | # 4. train model
183 | epochs_range = tqdm.trange(epochs, desc="Epochs")
184 | total_steps = (len(train_dataloader) * epochs) // batch_accum
185 | scheduler = models.get_schedule(
186 | optimizer, round(total_steps * warmup_len), total_steps
187 | )
188 |
189 | # 4. train model
190 | for epoch in epochs_range:
191 | ## train loop
192 | pbar = tqdm.tqdm(
193 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False
194 | )
195 | optimizer.zero_grad()
196 | for i, batch in enumerate(train_dataloader):
197 | optimizer.zero_grad()
198 | gls = batch["gloss_tensor"].to(device)
199 | vec = batch[vec_tensor_key].to(device)
200 | pred = model(gls)
201 | loss = criterion(pred, vec)
202 | loss.backward()
203 | grad_remains = True
204 | step = next(train_step)
205 | if i % batch_accum == 0:
206 | optimizer.step()
207 | scheduler.step()
208 | optimizer.zero_grad()
209 | grad_remains = False
210 | summary_writer.add_scalar(
211 | "revdict-train/lr", scheduler.get_last_lr()[0], step
212 | )
213 | # keep track of the train loss for this step
214 | with torch.no_grad():
215 | cos_sim = F.cosine_similarity(pred, vec).mean().item()
216 | summary_writer.add_scalar("revdict-train/cos", cos_sim, step)
217 | summary_writer.add_scalar("revdict-train/mse", loss.item(), step)
218 | pbar.update(vec.size(0))
219 | if grad_remains:
220 | optimizer.step()
221 | scheduler.step()
222 | optimizer.zero_grad()
223 | pbar.close()
224 | ## eval loop
225 | model.eval()
226 | with torch.no_grad():
227 | sum_dev_loss = 0.0
228 | sum_cosine = 0.0
229 | pbar = tqdm.tqdm(
230 | desc=f"Eval {epoch}",
231 | total=len(dev_dataset),
232 | disable=None,
233 | leave=False,
234 | )
235 | for batch in dev_dataloader:
236 | gls = batch["gloss_tensor"].to(device)
237 | vec = batch[vec_tensor_key].to(device)
238 | pred = model(gls)
239 | sum_dev_loss += (
240 | F.mse_loss(pred, vec, reduction="none").mean(1).sum().item()
241 | )
242 | sum_cosine += F.cosine_similarity(pred, vec).sum().item()
243 | pbar.update(vec.size(0))
244 | # keep track of the average loss on dev set for this epoch
245 | new_mse = sum_dev_loss / len(dev_dataset)
246 | summary_writer.add_scalar(
247 | "revdict-dev/cos", sum_cosine / len(dev_dataset), epoch
248 | )
249 | summary_writer.add_scalar("revdict-dev/mse", new_mse, epoch)
250 | pbar.close()
251 | if new_mse < (best_mse * 0.999):
252 | logger.debug(
253 | f"Epoch {epoch}, new best loss: {new_mse:.4f} < {best_mse:.4f}"
254 | + f" (x 0.999 = {best_mse * 0.999:.4f})"
255 | )
256 | best_mse = new_mse
257 | strikes = 0
258 | else:
259 | strikes += 1
260 | # check result if better
261 | if not (save_dir / "best_scores.txt").is_file():
262 | overall_best_mse = float("inf")
263 | else:
264 | with open(save_dir / "best_scores.txt", "r") as score_file:
265 | overall_best_mse = float(score_file.read())
266 | # save result if better
267 | if new_mse < overall_best_mse:
268 | logger.debug(
269 | f"Epoch {epoch}, new overall best loss: {new_mse:.4f} < {overall_best_mse:.4f}"
270 | )
271 | model.save(save_dir / "model.pt")
272 | with open(save_dir / "hparams.json", "w") as json_file:
273 | hparams = {
274 | "learning_rate": learning_rate,
275 | "beta1": beta1,
276 | "beta2": beta2,
277 | "weight_decay": weight_decay,
278 | }
279 | json.dump(hparams, json_file, indent=2)
280 | with open(save_dir / "best_scores.txt", "w") as score_file:
281 | print(new_mse, file=score_file)
282 | if strikes >= patience:
283 | logger.debug("Stopping early.")
284 | epochs_range.close()
285 | break
286 | model.train()
287 | # return loss for gp minimize
288 | return best_mse
289 |
290 |
291 | def pred(args):
292 | assert args.test_file is not None, "Missing dataset for test"
293 | # 1. retrieve vocab, dataset, model
294 | model = models.DefmodModel.load(args.save_dir / "model.pt")
295 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab
296 | test_dataset = data.JSONDataset(
297 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen
298 | )
299 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1024)
300 | model.eval()
301 | vec_tensor_key = f"{args.target_arch}_tensor"
302 | assert test_dataset.has_gloss, "File is not usable for the task"
303 | # 2. make predictions
304 | predictions = []
305 | with torch.no_grad():
306 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset))
307 | for batch in test_dataloader:
308 | vecs = model(batch["gloss_tensor"].to(args.device)).cpu()
309 | for id, vec in zip(batch["id"], vecs.unbind()):
310 | predictions.append(
311 | {"id": id, args.target_arch: vec.view(-1).cpu().tolist()}
312 | )
313 | pbar.update(vecs.size(0))
314 | pbar.close()
315 | with open(args.pred_file, "w") as ostr:
316 | json.dump(predictions, ostr)
317 |
318 |
319 | def main(args):
320 | assert not (args.do_train and args.do_htune), "Conflicting options"
321 |
322 | if args.do_train:
323 | logger.debug("Performing revdict training")
324 | train(
325 | args.train_file,
326 | args.dev_file,
327 | args.target_arch,
328 | args.summary_logdir,
329 | args.save_dir,
330 | args.device,
331 | )
332 | elif args.do_htune:
333 | logger.debug("Performing revdict hyperparameter tuning")
334 | search_space = get_search_space()
335 |
336 | @skopt.utils.use_named_args(search_space)
337 | def gp_train(**hparams):
338 | logger.debug(f"Hyperparams sampled:\n{pprint.pformat(hparams)}")
339 | best_loss = train(
340 | train_file=args.train_file,
341 | dev_file=args.dev_file,
342 | target_arch=args.target_arch,
343 | summary_logdir=args.summary_logdir
344 | / args.target_arch
345 | / secrets.token_urlsafe(8),
346 | save_dir=args.save_dir,
347 | device=args.device,
348 | spm_model_path=args.spm_model_path,
349 | learning_rate=hparams["learning_rate"],
350 | beta1=min(hparams["beta_a"], hparams["beta_b"]),
351 | beta2=max(hparams["beta_a"], hparams["beta_b"]),
352 | weight_decay=hparams["weight_decay"],
353 | batch_accum=hparams["batch_accum"],
354 | warmup_len=hparams["warmup_len"],
355 | n_head=2 ** hparams["n_head_pow"],
356 | n_layers=hparams["n_layers"],
357 | )
358 | return best_loss
359 |
360 | result = skopt.gp_minimize(gp_train, search_space)
361 | args.save_dir = args.save_dir / args.target_arch
362 | skopt.dump(result, args.save_dir / "results.pkl", store_objective=False)
363 |
364 | if args.do_pred:
365 | logger.debug("Performing revdict prediction")
366 | pred(args)
367 |
368 |
369 | if __name__ == "__main__":
370 | args = get_parser().parse_args()
371 | main(args)
372 |
--------------------------------------------------------------------------------
/baseline_archs/code/models.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | import data
8 |
9 |
10 | def get_schedule(
11 | optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1
12 | ):
13 | """From Huggingface"""
14 |
15 | def lr_lambda(current_step):
16 | if current_step < num_warmup_steps:
17 | return float(current_step) / float(max(1, num_warmup_steps))
18 | progress = float(current_step - num_warmup_steps) / float(
19 | max(1, num_training_steps - num_warmup_steps)
20 | )
21 | return max(
22 | 0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
23 | )
24 |
25 | return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch)
26 |
27 |
28 | class PositionalEncoding(nn.Module):
29 | """From PyTorch"""
30 |
31 | def __init__(self, d_model, dropout=0.1, max_len=4096):
32 | super(PositionalEncoding, self).__init__()
33 | self.dropout = nn.Dropout(p=dropout)
34 | pe = torch.zeros(max_len, d_model)
35 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
36 | div_term = torch.exp(
37 | torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
38 | )
39 | pe[:, 0::2] = torch.sin(position * div_term)
40 | pe[:, 1::2] = torch.cos(position * div_term)
41 | pe = pe.unsqueeze(0).transpose(0, 1)
42 | self.register_buffer("pe", pe)
43 |
44 | def forward(self, x):
45 | x = x + self.pe[: x.size(0)]
46 | return self.dropout(x)
47 |
48 |
49 | class DefmodModel(nn.Module):
50 | """A transformer architecture for Definition Modeling."""
51 |
52 | def __init__(
53 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=256
54 | ):
55 | super(DefmodModel, self).__init__()
56 | self.d_model = d_model
57 | self.padding_idx = vocab[data.PAD]
58 | self.eos_idx = vocab[data.EOS]
59 | self.maxlen = maxlen
60 |
61 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx)
62 | self.positional_encoding = PositionalEncoding(
63 | d_model, dropout=dropout, max_len=maxlen
64 | )
65 | encoder_layer = nn.TransformerEncoderLayer(
66 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2
67 | )
68 | self.transformer_encoder = nn.TransformerEncoder(
69 | encoder_layer, num_layers=n_layers
70 | )
71 | self.v_proj = nn.Linear(d_model, len(vocab))
72 | # initializing weights
73 | for name, param in self.named_parameters():
74 | if param.dim() > 1:
75 | nn.init.xavier_uniform_(param)
76 | elif "bias" in name:
77 | nn.init.zeros_(param)
78 | else: # gain parameters of the layer norm
79 | nn.init.ones_(param)
80 |
81 | def generate_square_subsequent_mask(self, sz):
82 | "from Pytorch"
83 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
84 | mask = (
85 | mask.float()
86 | .masked_fill(mask == 0, float("-inf"))
87 | .masked_fill(mask == 1, float(0.0))
88 | )
89 | return mask
90 |
91 | def forward(self, vector, input_sequence=None):
92 | device = next(self.parameters()).device
93 | embs = self.embedding(input_sequence)
94 | seq = torch.cat([vector.unsqueeze(0), embs], dim=0)
95 | src = self.positional_encoding(seq)
96 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device)
97 | src_key_padding_mask = torch.cat(
98 | [
99 | torch.tensor([[False] * input_sequence.size(1)]).to(device),
100 | (input_sequence == self.padding_idx),
101 | ],
102 | dim=0,
103 | ).t()
104 | transformer_output = self.transformer_encoder(
105 | src, mask=src_mask, src_key_padding_mask=src_key_padding_mask
106 | )
107 | v_dist = self.v_proj(transformer_output)
108 | return v_dist
109 |
110 | @staticmethod
111 | def load(file):
112 | return torch.load(file)
113 |
114 | def save(self, file):
115 | file.parent.mkdir(exist_ok=True, parents=True)
116 | torch.save(self, file)
117 |
118 | @torch.no_grad()
119 | def pred(self, vector, decode_fn=None, beam_size=64, verbose=False):
120 | # which device we should cast our variables to
121 | device = next(self.parameters()).device
122 |
123 | # how many examples are batched together
124 | batch_size = vector.size(0)
125 |
126 | # Tensors will have this shape:
127 | # [Sequence, Batch, Beam, Continuation, *]
128 |
129 | # accumulation variable, keeping track of the best beams for each batched example
130 | generated_symbols = torch.zeros(0, batch_size, beam_size, dtype=torch.long).to(device)
131 |
132 | # which beams hold a completed sequence
133 | current_beam_size = 1
134 | has_stopped = torch.tensor([False] * (batch_size * current_beam_size)).to(device)
135 |
136 | # the input to kick-start the generation is the embedding, we start with the same input for each beam
137 | vector_src = vector.unsqueeze(1).expand(batch_size, current_beam_size, -1).reshape(1, batch_size * current_beam_size, -1)
138 | src = vector_src
139 | src_key_padding_mask = torch.tensor([[False] * (batch_size * current_beam_size)]).to(device)
140 |
141 | # variables needed to compute the score of each beam (geometric mean of probability of emission)
142 | logprobs = torch.zeros(batch_size, current_beam_size, dtype=torch.double).to(device)
143 | lengths = torch.zeros(batch_size * current_beam_size, dtype=torch.int).to(device)
144 | # generate tokens step by step
145 | for step_idx in range(self.maxlen):
146 |
147 | # generation mask
148 | src_mask = self.generate_square_subsequent_mask(src.size(0)).to(device)
149 | # positional encoding
150 | src_pe = self.positional_encoding(src)
151 | # transformer output
152 | transformer_output = self.transformer_encoder(
153 | src_pe, mask=src_mask, src_key_padding_mask=src_key_padding_mask.t()
154 | )[-1]
155 | # distribution over the full vocabulary
156 | v_dist = self.v_proj(transformer_output)
157 | # don't generate padding tokens
158 | v_dist[...,self.padding_idx] = -float("inf")
159 | v_dist = F.log_softmax(v_dist, dim=-1)
160 |
161 | # for each beam, select the best candidate continuations
162 | new_logprobs, new_symbols = v_dist.topk(beam_size, dim=-1)
163 | # patch the output scores to zero-out items that have already stopped
164 | new_logprobs = new_logprobs.masked_fill(has_stopped.unsqueeze(-1), 0.0)
165 | # if the beam hasn't stopped, then it needs to produce at least an EOS
166 | # so we can just add one to beams that have not stopped to account for the current token
167 | lengths += (~has_stopped).int()
168 |
169 | # compute scores for each continuation
170 | ## recreate the score of the previous full sequence for all possible continuations
171 | logprobs_ = logprobs.view(batch_size * current_beam_size, 1).expand(batch_size * current_beam_size, beam_size)
172 | ## add the cost of each continuation
173 | logprobs_ = logprobs_ + new_logprobs
174 | ## average over the full sequence, ignoring padding items
175 | avg_logprobs = logprobs_ #/ lengths.unsqueeze(-1)
176 | ## select the `beam_size` best continuations overall, their matching scores will be `avg_logprobs`
177 | avg_logprobs, selected_beams = avg_logprobs.view(batch_size, current_beam_size * beam_size).topk(beam_size, dim=-1)
178 | ## select back the base score for the selected continuations
179 | logprobs = logprobs_.view(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(batch_size, beam_size)
180 |
181 | # add symbols of best continuations
182 | ## recreate the full previous sequence for all possible continuations
183 | generated_symbols_ = generated_symbols.view(-1, batch_size * current_beam_size, 1).expand(-1, batch_size * current_beam_size, beam_size)
184 | ## stack on the new symbols
185 | generated_symbols_ = torch.cat([generated_symbols_, new_symbols.unsqueeze(0)], dim=0)
186 | ## grab only the `beam_size` best continuations out of all possible continuations
187 | generated_symbols_ = generated_symbols_.view(-1, batch_size, current_beam_size * beam_size)
188 | generated_symbols = generated_symbols_.gather(-1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, beam_size)).view(step_idx + 1, batch_size, beam_size)
189 |
190 | # recompute which beams have stopped, and what their lengths are
191 | ## reconstruct the lengths of all candidate continuations
192 | lengths = lengths.view(batch_size, current_beam_size, 1).expand(batch_size, current_beam_size, beam_size)
193 | ## retrieve the lengths of the selected beam continuations
194 | lengths = lengths.reshape(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(-1)
195 | ## reconstruct the halting state of all candidate continuations
196 | has_stopped = has_stopped.view(batch_size, current_beam_size, 1).expand(batch_size, current_beam_size, beam_size)
197 | ## retrieve the halting states of selected beam continuations
198 | has_stopped = has_stopped.reshape(batch_size, current_beam_size * beam_size).gather(-1, selected_beams).view(-1)
199 |
200 | # flag which beams have terminated at the current step (i.e., whether they just produced an EOS)
201 | generated_symbols = generated_symbols.view(-1, batch_size * beam_size)
202 | generated_symbols[-1] = generated_symbols[-1].masked_fill(has_stopped, self.padding_idx)
203 | has_stopped = has_stopped | (generated_symbols.view(-1, batch_size * beam_size)[-1] == self.eos_idx).view(batch_size * beam_size)
204 |
205 | # recompute padding mask on the basis of which continuations were selected
206 | src_key_padding_mask = src_key_padding_mask.view(-1, batch_size, current_beam_size, 1).expand(-1, batch_size, current_beam_size, beam_size)
207 | src_key_padding_mask = src_key_padding_mask.reshape(-1, batch_size, current_beam_size * beam_size)
208 | src_key_padding_mask = src_key_padding_mask.gather(-1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, beam_size)).view(step_idx + 1, batch_size * beam_size)
209 | src_key_padding_mask = torch.cat([src_key_padding_mask, has_stopped.unsqueeze(0)], dim=0)
210 |
211 | # produce input for the next timestep
212 | src = torch.cat([vector_src.expand(1, beam_size, -1), self.embedding(generated_symbols)], dim=0)
213 | # reshape to the familiar format
214 | generated_symbols = generated_symbols.view(-1, batch_size, beam_size)
215 |
216 | # if all beams have stopped, so do we
217 | if has_stopped.all():
218 | break
219 | # we update the number of sustained beam at the first iteration, since we know have `beam_size` candidates.
220 | current_beam_size = beam_size
221 |
222 | # select the most likely sequence for each batched item
223 | max_scores, selected_beams = (logprobs / lengths.view(batch_size, beam_size)).topk(1, dim=1)
224 | output_sequence = generated_symbols.gather(1, selected_beams.unsqueeze(0).expand(step_idx + 1, batch_size, 1))
225 | if verbose: print(decode_fn(output_sequence.squeeze(-1)))
226 | return output_sequence.squeeze(-1)
227 |
228 |
229 | class RevdictModel(nn.Module):
230 | """A transformer architecture for Definition Modeling."""
231 |
232 | def __init__(
233 | self, vocab, d_model=256, n_head=4, n_layers=4, dropout=0.3, maxlen=512
234 | ):
235 | super(RevdictModel, self).__init__()
236 | self.d_model = d_model
237 | self.padding_idx = vocab[data.PAD]
238 | self.eos_idx = vocab[data.EOS]
239 | self.maxlen = maxlen
240 |
241 | self.embedding = nn.Embedding(len(vocab), d_model, padding_idx=self.padding_idx)
242 | self.positional_encoding = PositionalEncoding(
243 | d_model, dropout=dropout, max_len=maxlen
244 | )
245 | encoder_layer = nn.TransformerEncoderLayer(
246 | d_model=d_model, nhead=n_head, dropout=dropout, dim_feedforward=d_model * 2
247 | )
248 | self.transformer_encoder = nn.TransformerEncoder(
249 | encoder_layer, num_layers=n_layers
250 | )
251 | self.dropout = nn.Dropout(p=dropout)
252 | self.e_proj = nn.Linear(d_model, d_model)
253 | for name, param in self.named_parameters():
254 | if param.dim() > 1:
255 | nn.init.xavier_uniform_(param)
256 | elif "bias" in name:
257 | nn.init.zeros_(param)
258 | else: # gain parameters of the layer norm
259 | nn.init.ones_(param)
260 |
261 | def forward(self, gloss_tensor):
262 | src_key_padding_mask = gloss_tensor == self.padding_idx
263 | embs = self.embedding(gloss_tensor)
264 | src = self.positional_encoding(embs)
265 | transformer_output = self.dropout(
266 | self.transformer_encoder(src, src_key_padding_mask=src_key_padding_mask.t())
267 | )
268 | summed_embs = transformer_output.masked_fill(
269 | src_key_padding_mask.unsqueeze(-1), 0
270 | ).sum(dim=0)
271 | return self.e_proj(F.relu(summed_embs))
272 |
273 | @staticmethod
274 | def load(file):
275 | return torch.load(file)
276 |
277 | def save(self, file):
278 | torch.save(self, file)
279 |
280 |
281 | def linear_combination(x, y, epsilon):
282 | return epsilon * x + (1 - epsilon) * y
283 |
284 |
285 | def reduce_loss(loss, reduction="mean"):
286 | return (
287 | loss.mean()
288 | if reduction == "mean"
289 | else loss.sum()
290 | if reduction == "sum"
291 | else loss
292 | )
293 |
294 |
295 | # Implementation of Label smoothing with CrossEntropy and ignore_index
296 | class LabelSmoothingCrossEntropy(nn.Module):
297 | def __init__(self, epsilon: float = 0.1, reduction="mean", ignore_index=-100):
298 | super().__init__()
299 | self.epsilon = epsilon
300 | self.reduction = reduction
301 | self.ignore_index = ignore_index
302 |
303 | def forward(self, preds, target):
304 | n = preds.size()[-1]
305 | log_preds = F.log_softmax(preds, dim=-1)
306 | loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
307 | nll = F.nll_loss(
308 | log_preds, target, reduction=self.reduction, ignore_index=self.ignore_index
309 | )
310 | return linear_combination(loss / n, nll, self.epsilon)
311 |
--------------------------------------------------------------------------------
/baseline_archs/code/defmod.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import logging
5 | import pathlib
6 | import pprint
7 | import secrets
8 |
9 | import skopt
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | import torch.optim as optim
15 | from torch.utils.tensorboard import SummaryWriter
16 |
17 | import tqdm
18 |
19 | import data
20 | import models
21 |
22 | logger = logging.getLogger(pathlib.Path(__file__).name)
23 | logger.setLevel(logging.DEBUG)
24 | handler = logging.StreamHandler(tqdm.tqdm)
25 | handler.terminator = ""
26 | handler.setFormatter(
27 | logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
28 | )
29 | logger.addHandler(handler)
30 |
31 |
32 | def get_parser(
33 | parser=argparse.ArgumentParser(description="run a definition modeling baseline"),
34 | ):
35 | parser.add_argument(
36 | "--do_htune",
37 | action="store_true",
38 | help="whether to perform hyperparameter tuning",
39 | )
40 | parser.add_argument(
41 | "--do_train", action="store_true", help="whether to train a model from scratch"
42 | )
43 | parser.add_argument(
44 | "--do_pred", action="store_true", help="whether to produce predictions"
45 | )
46 | parser.add_argument(
47 | "--train_file", type=pathlib.Path, help="path to the train file"
48 | )
49 | parser.add_argument("--dev_file", type=pathlib.Path, help="path to the dev file")
50 | parser.add_argument("--test_file", type=pathlib.Path, help="path to the test file")
51 | parser.add_argument(
52 | "--device",
53 | type=torch.device,
54 | default=torch.device("cpu"),
55 | help="path to the train file",
56 | )
57 | parser.add_argument(
58 | "--source_arch",
59 | type=str,
60 | default="sgns",
61 | choices=("sgns", "char", "electra"),
62 | help="embedding architecture to use as source",
63 | )
64 | parser.add_argument(
65 | "--summary_logdir",
66 | type=pathlib.Path,
67 | default=pathlib.Path("logs") / "defmod-baseline",
68 | help="write logs for future analysis",
69 | )
70 | parser.add_argument(
71 | "--save_dir",
72 | type=pathlib.Path,
73 | default=pathlib.Path("models") / "defmod-baseline",
74 | help="where to save model & vocab",
75 | )
76 | parser.add_argument(
77 | "--spm_model_path",
78 | type=pathlib.Path,
79 | default=None,
80 | help="use sentencepiece model, if required train and save it here",
81 | )
82 | parser.add_argument(
83 | "--pred_file",
84 | type=pathlib.Path,
85 | default=pathlib.Path("defmod-baseline-preds.json"),
86 | help="where to save predictions",
87 | )
88 | return parser
89 |
90 |
91 | def get_search_space():
92 | """get hyperparmeters to optimize for"""
93 | search_space = [
94 | skopt.space.Real(1e-8, 1.0, "log-uniform", name="learning_rate"),
95 | skopt.space.Real(0.0, 1.0, "uniform", name="weight_decay"),
96 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_a"),
97 | skopt.space.Real(0.9, 1.0 - 1e-8, "log-uniform", name="beta_b"),
98 | skopt.space.Real(0.0, 0.9, "uniform", name="dropout"),
99 | skopt.space.Real(0.0, 1.0, "uniform", name="warmup_len"),
100 | skopt.space.Real(0.0, 1.0 - 1e-8, "uniform", name="label_smoothing"),
101 | skopt.space.Integer(1, 100, "log-uniform", name="batch_accum"),
102 | skopt.space.Integer(0, 5, "uniform", name="n_head_pow"),
103 | skopt.space.Integer(1, 6, "uniform", name="n_layers"),
104 | ]
105 | return search_space
106 |
107 |
108 | def train(
109 | train_file,
110 | dev_file,
111 | source_arch="sgns",
112 | summary_logdir=pathlib.Path("logs") / "defmod-htune",
113 | save_dir=pathlib.Path("models") / "defmod-baseline",
114 | device="cuda:0",
115 | spm_model_path=None,
116 | epochs=100,
117 | learning_rate=1e-4,
118 | beta1=0.9,
119 | beta2=0.999,
120 | weight_decay=1e-6,
121 | patience=5,
122 | batch_accum=1,
123 | dropout=0.3,
124 | warmup_len=0.1,
125 | label_smoothing=0.1,
126 | n_head=4,
127 | n_layers=4,
128 | ):
129 | assert train_file is not None, "Missing dataset for training"
130 | assert dev_file is not None, "Missing dataset for development"
131 |
132 | # 1. get data, vocabulary, summary writer
133 | logger.debug("Preloading data")
134 | save_dir = save_dir / source_arch
135 | save_dir.mkdir(parents=True, exist_ok=True)
136 | ## make datasets
137 | train_dataset = data.get_train_dataset(train_file, spm_model_path, save_dir)
138 | dev_dataset = data.get_dev_dataset(
139 | dev_file, spm_model_path, save_dir, train_dataset
140 | )
141 | ## assert they correspond to the task
142 | assert train_dataset.has_gloss, "Training dataset contains no gloss."
143 | if source_arch == "electra":
144 | assert train_dataset.has_electra, "Training datatset contains no vector."
145 | else:
146 | assert train_dataset.has_vecs, "Training datatset contains no vector."
147 | assert dev_dataset.has_gloss, "Development dataset contains no gloss."
148 | if source_arch == "electra":
149 | assert dev_dataset.has_electra, "Development dataset contains no vector."
150 | else:
151 | assert dev_dataset.has_vecs, "Development dataset contains no vector."
152 | ## make dataloader
153 | train_dataloader = data.get_dataloader(train_dataset)
154 | dev_dataloader = data.get_dataloader(dev_dataset, shuffle=False)
155 | ## make summary writer
156 | summary_writer = SummaryWriter(summary_logdir)
157 | train_step = itertools.count() # to keep track of the training steps for logging
158 |
159 | # 2. construct model
160 | logger.debug("Setting up training environment")
161 | model = models.DefmodModel(
162 | dev_dataset.vocab, n_head=n_head, n_layers=n_layers, dropout=dropout
163 | )
164 | model = model.to(device)
165 | model.train()
166 |
167 | # 3. declare optimizer & criterion
168 | ## Hyperparams
169 | optimizer = optim.AdamW(
170 | model.parameters(),
171 | lr=learning_rate,
172 | betas=(beta1, beta2),
173 | weight_decay=weight_decay,
174 | )
175 | xent_criterion = nn.CrossEntropyLoss(ignore_index=model.padding_idx)
176 | if label_smoothing > 0.0:
177 | smooth_criterion = models.LabelSmoothingCrossEntropy(
178 | ignore_index=model.padding_idx, epsilon=label_smoothing
179 | )
180 | else:
181 | smooth_criterion = xent_criterion
182 |
183 | vec_tensor_key = f"{source_arch}_tensor"
184 | best_xent = float("inf")
185 | strikes = 0
186 |
187 | # 4. train model
188 | epochs_range = tqdm.trange(epochs, desc="Epochs")
189 | total_steps = (len(train_dataloader) * epochs) // batch_accum
190 | scheduler = models.get_schedule(
191 | optimizer, round(total_steps * warmup_len), total_steps
192 | )
193 | for epoch in epochs_range:
194 | ## train loop
195 | pbar = tqdm.tqdm(
196 | desc=f"Train {epoch}", total=len(train_dataset), disable=None, leave=False
197 | )
198 | optimizer.zero_grad()
199 | for i, batch in enumerate(train_dataloader):
200 | vec = batch[vec_tensor_key].to(device)
201 | gls = batch["gloss_tensor"].to(device)
202 | pred = model(vec, gls[:-1])
203 | loss = smooth_criterion(pred.view(-1, pred.size(-1)), gls.view(-1))
204 | loss.backward()
205 | grad_remains = True
206 | step = next(train_step)
207 | if i % batch_accum == 0:
208 | optimizer.step()
209 | scheduler.step()
210 | optimizer.zero_grad()
211 | grad_remains = False
212 | summary_writer.add_scalar(
213 | "defmod-train/lr", scheduler.get_last_lr()[0], step
214 | )
215 | # keep track of the train loss for this step
216 | with torch.no_grad():
217 | tokens = gls != model.padding_idx
218 | acc = (
219 | ((pred.argmax(-1) == gls) & tokens).float().sum() / tokens.sum()
220 | ).item()
221 | xent_unsmoothed = xent_criterion(
222 | pred.view(-1, pred.size(-1)), gls.view(-1)
223 | )
224 | summary_writer.add_scalar("defmod-train/xent_smooth", loss.item(), step)
225 | summary_writer.add_scalar("defmod-train/xent", xent_unsmoothed, step)
226 | summary_writer.add_scalar("defmod-train/acc", acc, step)
227 | pbar.update(vec.size(0))
228 | if grad_remains:
229 | optimizer.step()
230 | scheduler.step()
231 | optimizer.zero_grad()
232 | pbar.close()
233 | ## eval loop
234 | model.eval()
235 | with torch.no_grad():
236 | sum_dev_loss = 0.0
237 | sum_acc = 0
238 | ntoks = 0
239 | pbar = tqdm.tqdm(
240 | desc=f"Eval {epoch}",
241 | total=len(dev_dataset),
242 | disable=None,
243 | leave=False,
244 | )
245 | for batch in dev_dataloader:
246 | vec = batch[vec_tensor_key].to(device)
247 | gls = batch["gloss_tensor"].to(device)
248 | pred = model(vec, gls[:-1])
249 | sum_dev_loss += F.cross_entropy(
250 | pred.view(-1, pred.size(-1)),
251 | gls.view(-1),
252 | reduction="sum",
253 | ignore_index=model.padding_idx,
254 | ).item()
255 | tokens = gls != model.padding_idx
256 | ntoks += tokens.sum().item()
257 | sum_acc += ((pred.argmax(-1) == gls) & tokens).sum().item()
258 | pbar.update(vec.size(0))
259 |
260 | # keep track of the average loss & acc on dev set for this epoch
261 | new_xent = sum_dev_loss / ntoks
262 | summary_writer.add_scalar("defmod-dev/xent", new_xent, epoch)
263 | summary_writer.add_scalar("defmod-dev/acc", sum_acc / ntoks, epoch)
264 | pbar.close()
265 | if new_xent < (best_xent * 0.999):
266 | logger.debug(
267 | f"Epoch {epoch}, new best loss: {new_xent:.4f} < {best_xent:.4f}"
268 | + f" (x 0.999 = {best_xent * 0.999:.4f})"
269 | )
270 | best_xent = new_xent
271 | strikes = 0
272 | else:
273 | strikes += 1
274 | # check result if better
275 | if not (save_dir / "best_scores.txt").is_file():
276 | overall_best_xent = float("inf")
277 | else:
278 | with open(save_dir / "best_scores.txt", "r") as score_file:
279 | overall_best_xent = float(score_file.read())
280 | # save result if better
281 | if new_xent < overall_best_xent:
282 | logger.debug(
283 | f"Epoch {epoch}, new overall best loss: {new_xent:.4f} < {overall_best_xent:.4f}"
284 | )
285 | model.save(save_dir / "model.pt")
286 | with open(save_dir / "hparams.json", "w") as json_file:
287 | hparams = {
288 | "learning_rate": learning_rate,
289 | "beta1": beta1,
290 | "beta2": beta2,
291 | "weight_decay": weight_decay,
292 | }
293 | json.dump(hparams, json_file, indent=2)
294 | with open(save_dir / "best_scores.txt", "w") as score_file:
295 | print(new_xent, file=score_file)
296 |
297 | if strikes >= patience:
298 | logger.debug("Stopping early.")
299 | epochs_range.close()
300 | break
301 | model.train()
302 | # return loss for gp minimize
303 | return best_xent
304 |
305 |
306 | def pred(args):
307 | assert args.test_file is not None, "Missing dataset for test"
308 | # 1. retrieve vocab, dataset, model
309 | model = models.DefmodModel.load(args.save_dir / "model.pt")
310 | train_vocab = data.JSONDataset.load(args.save_dir / "train_dataset.pt").vocab
311 | test_dataset = data.JSONDataset(
312 | args.test_file, vocab=train_vocab, freeze_vocab=True, maxlen=model.maxlen, spm_model_name=args.spm_model_path
313 | )
314 | test_dataloader = data.get_dataloader(test_dataset, shuffle=False, batch_size=1)
315 | model.eval()
316 | vec_tensor_key = f"{args.source_arch}_tensor"
317 | if args.source_arch == "electra":
318 | assert test_dataset.has_electra, "File is not usable for the task"
319 | else:
320 | assert test_dataset.has_vecs, "File is not usable for the task"
321 | # 2. make predictions
322 | predictions = []
323 | with torch.no_grad():
324 | pbar = tqdm.tqdm(desc="Pred.", total=len(test_dataset), disable=None)
325 | for batch in test_dataloader:
326 | sequence = model.pred(batch[vec_tensor_key].to(args.device), decode_fn=test_dataset.decode, verbose=False)
327 | for id, gloss in zip(batch["id"], test_dataset.decode(sequence)):
328 | predictions.append({"id": id, "gloss": gloss})
329 | pbar.update(batch[vec_tensor_key].size(0))
330 | pbar.close()
331 | # 3. dump predictions
332 | with open(args.pred_file, "w") as ostr:
333 | json.dump(predictions, ostr)
334 |
335 |
336 | def main(args):
337 | assert not (args.do_train and args.do_htune), "Conflicting options"
338 | if args.do_train:
339 | logger.debug("Performing defmod training")
340 | train(
341 | args.train_file,
342 | args.dev_file,
343 | args.source_arch,
344 | args.summary_logdir,
345 | args.save_dir,
346 | args.device,
347 | )
348 | elif args.do_htune:
349 | logger.debug("Performing defmod hyperparameter tuning")
350 | search_space = get_search_space()
351 |
352 | @skopt.utils.use_named_args(search_space)
353 | def gp_train(**hparams):
354 | logger.debug(f"Hyperparams sampled:\n{pprint.pformat(hparams)}")
355 | best_loss = train(
356 | train_file=args.train_file,
357 | dev_file=args.dev_file,
358 | source_arch=args.source_arch,
359 | summary_logdir=args.summary_logdir
360 | / args.source_arch
361 | / secrets.token_urlsafe(8),
362 | save_dir=args.save_dir,
363 | device=args.device,
364 | spm_model_path=args.spm_model_path,
365 | learning_rate=hparams["learning_rate"],
366 | beta1=min(hparams["beta_a"], hparams["beta_b"]),
367 | beta2=max(hparams["beta_a"], hparams["beta_b"]),
368 | weight_decay=hparams["weight_decay"],
369 | batch_accum=hparams["batch_accum"],
370 | warmup_len=hparams["warmup_len"],
371 | label_smoothing=hparams["label_smoothing"],
372 | n_head=2 ** hparams["n_head_pow"],
373 | n_layers=hparams["n_layers"],
374 | )
375 | return best_loss
376 |
377 | result = skopt.gp_minimize(gp_train, search_space)
378 | args.save_dir = args.save_dir / args.source_arch
379 | skopt.dump(result, args.save_dir / "results.pkl", store_objective=False)
380 |
381 | if args.do_pred:
382 | logger.debug("Performing defmod prediction")
383 | pred(args)
384 |
385 |
386 | if __name__ == "__main__":
387 | args = get_parser().parse_args()
388 | main(args)
389 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Comparing Dictionaries and Word Embeddings
2 |
3 | This is the repository for the SemEval 2022 Shared Task #1: Comparing
4 | Dictionaries and Word Embeddings (CODWOE).
5 |
6 | This repository currently contains: the configuration for the codalab
7 | competition, a Docker image to reproduce the environment, a scorer, a
8 | format-checker and baseline programs to help participants get started.
9 |
10 | Participants may be interested in the script `codwoe_entrypoint.py`. It contains
11 | a number of useful features, such as scoring submissions, a format checker and a
12 | few simple baseline architectures. It is also the exact copy of what is used on
13 | the codalab.
14 |
15 | **Datasets are no longer provided directly on this repository. The competition datasets are now available on this page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).**
16 |
17 | # What is this task?
18 | The CODWOE shared task invites you to compare two types of semantic
19 | descriptions: dictionary glosses and word embedding representations. Are these
20 | two types of representation equivalent? Can we generate one from the other? To
21 | study this question, we propose two subtracks: a **definition modeling** track
22 | (Noraset et al., 2017), where participants have to generate glosses from
23 | vectors, and a **reverse dictionary** track (Hill et al., 2016), where
24 | participants have to generate vectors from glosses.
25 |
26 | These two tracks display a number of interesting characteristics. Definition
27 | modeling is a vector-to-sequence task, the reverse dictionary task is a
28 | sequence-to-vector task—and you know that kind of thing gets NLP people swearing
29 | out loud. These tasks are also useful for explainable AI, since they involve
30 | converting human-readable data into machine-readable data and back.
31 |
32 | To get involved: check out the
33 | [codalab competition](https://competitions.codalab.org/competitions/34022).
34 | There is also a participants'
35 | ["semeval2022-dictionaries-and-word-embeddings" google group](mailto:semeval2022-dictionaries-and-word-embeddings@googlegroups.com),
36 | as well as a [discord server](https://discord.gg/y8g6qXakNs).
37 | You can reach us organizers through [this email](mailto:tmickus@atilf.fr); make
38 | sure to mention SemEval in your email object.
39 |
40 | # How hard is it?
41 |
42 | ## Official rankings
43 |
44 | Below are the official rankings for the SemEval 2022 CODWOE Shared task.
45 | More information about the submissions we received is available in this git (see the `rankings/` sub-directory).
46 |
47 | ### Definition Modeling track
48 |
49 | Below are the results for the Definition Modeling track.
50 |
51 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
52 | |---------------|--------:|--------:|--------:|--------:|--------:
53 | | Locchi | 8 | 6 | | 7 |
54 | | LingJing | 9 | 7 | 6 | 6 | 6
55 | | BLCU-ICALL | 3 | 2 | 3 | **1** | 2
56 | | IRB-NLP | 2 | **1** | **1** | 5 | 5
57 | | emukans | 5 | 4 | 4 | 4 | 3
58 | | guntis | 6 | | | |
59 | | lukechan1231 | 7 | 5 | 5 | 3 | 4
60 | | pzchen | 4 | 3 | 2 | 2 | **1**
61 | | talent404 | **1** | | | |
62 |
63 | ### Reverse Dictionary track
64 |
65 | Below are the results for the Reverse dictionary tracks.
66 | There are separate rankings, based on which targets participants have submitted.
67 |
68 |
69 | #### A. SGNS targets
70 |
71 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
72 | |------------------|--------:|--------:|--------:|--------:|--------:
73 | | Locchi | 4 | | | 4 |
74 | | BL.Research | 5 | 5 | 4 | 6 | 4
75 | | LingJing | **1** | 2 | 2 | 3 | **1**
76 | | MMG | | 3 | | |
77 | | chlrbgus321 | N/A | | | |
78 | | IRB-NLP | 3 | **1** | **1** | **1** | 2
79 | | pzchen | 2 | 4 | 3 | 2 | 3
80 | | the0ne | 7 | | | |
81 | | JSI | 8 | 7 | 6 | 7 | 6
82 | | zhwa3087 | 6 | 6 | 5 | 5 | 5
83 |
84 | #### B. ELECTRA targets
85 |
86 | | user / team | Rank EN | Rank FR | Rank RU
87 | |------------------|--------:|--------:|--------:
88 | | Locchi | 3 | |
89 | | BL.Research | 2 | 2 | 4
90 | | LingJing | 4 | 4 | 2
91 | | IRB-NLP | 5 | 3 | 3
92 | | pzchen | **1** | **1** | **1**
93 | | the0ne | 6 | |
94 |
95 |
96 | #### C. Char-based targets
97 |
98 | | user / team | Rank EN | Rank ES | Rank FR | Rank IT | Rank RU
99 | |------------------|--------:|--------:|--------:|--------:|--------:
100 | | Locchi | **1** | | | 4 |
101 | | BL.Research | 2 | 2 | 2 | 3 | 4
102 | | LingJing | 7 | 5 | 5 | 6 | 5
103 | | IRB-NLP | 4 | 3 | 4 | 2 | 2
104 | | pzchen | 3 | **1** | **1** | **1** | **1**
105 | | the0ne | 5 | | | |
106 | | zhwa3087 | 6 | 4 | 3 | 5 | 3
107 |
108 |
109 |
110 | ## Baseline results
111 | Here are baseline results on the development set for the two tracks.
112 | We used the code described in `code/baseline_archs` to generate these scores.
113 |
114 | For the Reverse Dictionary track results, rows will correspond to different targets.
115 | On the other hand, rows of the Definition Modeling table below correspond to different inputs to the system.
116 | Scores were computed using the scoring script provided in this git (`code/score.py`).
117 |
118 | ### Reverse Dictionary track
119 |
120 | | | MSE | Cosine | Ranking
121 | |------------|--------:|--------:|--------:
122 | | en SGNS | 0.91092 | 0.15132 | 0.49030
123 | | en char | 0.14776 | 0.79006 | 0.50218
124 | | en electra | 1.41287 | 0.84283 | 0.49849
125 | | es SGNS | 0.92996 | 0.20406 | 0.49912
126 | | es char | 0.56952 | 0.80634 | 0.49778
127 | | fr SGNS | 1.14050 | 0.19774 | 0.49052
128 | | fr char | 0.39480 | 0.75852 | 0.49945
129 | | fr electra | 1.15348 | 0.85629 | 0.49784
130 | | it SGNS | 1.12536 | 0.20430 | 0.47692
131 | | it char | 0.36309 | 0.72732 | 0.49663
132 | | ru SGNS | 0.57683 | 0.25316 | 0.49008
133 | | ru char | 0.13498 | 0.82624 | 0.49451
134 | | ru electra | 0.87358 | 0.72086 | 0.49120
135 |
136 |
137 | ### Definition Modeling track
138 |
139 | | | Sense-BLEU | Lemma-BLEU | MoverScore
140 | |------------|-----------:|-----------:|-----------:
141 | | en SGNS | 0.03048 | 0.04062 | 0.08307
142 | | en char | 0.02630 | 0.03359 | 0.04531
143 | | en electra | 0.03155 | 0.04155 | 0.06732
144 | | es SGNS | 0.03528 | 0.05273 | 0.06685
145 | | es char | 0.03291 | 0.04712 | 0.06112
146 | | fr SGNS | 0.02983 | 0.04134 | 0.04036
147 | | fr char | 0.02913 | 0.03985 | 0.01935
148 | | fr electra | 0.03061 | 0.03954 | 0.03855
149 | | it SGNS | 0.04759 | 0.06910 | 0.10154
150 | | it char | 0.02532 | 0.03522 | 0.04068
151 | | ru SGNS | 0.03805 | 0.05121 | 0.11559
152 | | ru char | 0.02324 | 0.03238 | 0.07145
153 | | ru electra | 0.02987 | 0.03782 | 0.10382
154 |
155 | # Using this repository
156 | To install the exact environment used for our scripts, see the
157 | `requirements.txt` file which lists the library we used. Do
158 | note that the exact installation in the competition underwent supplementary
159 | tweaks: in particular, we patch the moverscore library to have it run on CPU.
160 |
161 | Another possibility is to use the dockerfile written for the codalab
162 | competition. You can also pull this docker image from dockerhub:
163 | [`linguistickus/codwoe`](https://hub.docker.com/r/linguistickus/codwoe). This
164 | Docker image doesn't contain the code, so you will also need to clone the
165 | repository within it; but this image will also contain our tweaks.
166 |
167 | Code useful to participants is stored in the `code/` directory.
168 | To see options a simple baseline on the definition modeling track, use:
169 | ```sh
170 | $ python3 code/codwoe_entrypoint.py defmod --help
171 | ```
172 | To see options for a simple baseline on the reverse dictionary track, use:
173 | ```sh
174 | $ python3 code/codwoe_entrypoint.py revdict --help
175 | ```
176 | To verify the format of a submission, run:
177 | ```sh
178 | $ python3 code/codwoe_entrypoint.py check-format $PATH_TO_SUBMISSION_FILE
179 | ```
180 | To score a submission, use
181 | ```sh
182 | $ python3 code/codwoe_entrypoint.py score $PATH_TO_SUBMISSION_FILE --reference_files_dir $PATH_TO_DATA_DIR
183 | ```
184 | Note that this requires the gold files, not available at the start of the
185 | competition.
186 |
187 | Other useful files to look at include `code/models.py`, where our baseline
188 | architectures are defined, and `code/data.py`, which shows how to use the JSON
189 | datasets with the PyTorch dataset API.
190 |
191 | # Using the datasets
192 |
193 | **Datasets are no longer provided directly on this repository. The competition datasets are now available on this page: [https://codwoe.atilf.fr/](https://codwoe.atilf.fr/).**
194 |
195 | This section details the structure of the JSON dataset file we provide. More information is available on the competition website: [link](https://competitions.codalab.org/competitions/34022#participate-get_data).
196 |
197 | ### Brief Overview
198 |
199 | As an overview, the expected usage of the datasets is as follow:
200 | + In the Definition Modeling track, we expect participants to use the embeddings ("char", "sgns", "electra") to generate the associated definition ("gloss").
201 | + In the Reverse Dictionary track, we expect participants to use the definition ("gloss") to generate any of the associated embeddings ("char", "sgns", "electra").
202 |
203 |
204 | ### Dataset files structure
205 |
206 | Each dataset file correspond to a data split (trial/train/dev/test) for one of the languages.
207 |
208 | Dataset files are in the JSON format. A dataset file contains a list of examples. Each example is a JSON dictionary, containing the following keys:
209 | + "id",
210 | + "gloss"
211 | + "sgns"
212 | + "char"
213 |
214 | The English, French and Russian dictionary also contain an "electra" key.
215 |
216 | As a concrete instance, here is an example from the English training dataset:
217 | ```json
218 | {
219 | "id": "en.train.2",
220 | "gloss": "A vocal genre in Hindustani classical music",
221 | "sgns": [
222 | -0.0602365807,
223 | ...
224 | ],
225 | "char": [
226 | -0.3631578386,
227 | ...
228 | ],
229 | "electra": [
230 | -1.3904430866,
231 | ...
232 | ]
233 | },
234 | ```
235 |
236 | ### Description of contents
237 |
238 | The value associated to "id" tracks the language, data split and unique identifier for this example.
239 |
240 | The value associated to the "gloss" key is a definition, as you would find in a classical dictionary. It is to be used either the target in the Definition Modeling track, or asthe source in the Reverse Dictionary track.
241 |
242 | All other keys ("char", "sgns", "electra") correspond to embeddings, and the associated values are arrays of floats representing the components. They all can serve as targets for the Reverse Dictionary track.
243 | + "char" corresponds to character-based embeddings, computed using an auto-encoder on the spelling of a word.
244 | + "sgns" corresponds to skip-gram with negative sampling embeddings (aka. word2vec)
245 | + "electra" corresponds to Transformer-based contextualized embeddings.
246 |
247 |
248 | ### Using the dataset files
249 |
250 | Given that the data is in JSON format, it is straightforward to load it in python:
251 |
252 | ```python
253 | import json
254 | with open(PATH_TO_DATASET, "r") as file_handler:
255 | dataset = json.load(file_handler)
256 | ```
257 |
258 | A more complete example for pytorch is available in the git repository (see here: [link](https://git.atilf.fr/tmickus/codwoe/-/blob/master/code/data.py#L18)).
259 |
260 | ### Expected output format
261 |
262 | During the evaluation phase, we will expect submissions to reconstruct the same JSON format.
263 |
264 | The test JSON files for input will be separate for each track. They will contain the "id" key, and either the "gloss" key (in the reverse dictionary track) or the embedding keys ("char" and "sgns" keys, and "electra" "key" in EN/FR/RU, in the definition modeling track).
265 |
266 | In the definition modeling track, participants should construct JSON files that contain at least the two following keys:
267 | + the original "id"
268 | + their generated "gloss"
269 |
270 | In the reverse dictionary, participants should construct JSON files that contain at least the two following keys:
271 | + the original "id",
272 | + any of the valid embeddings ("char", "sgns", or "electra" key in EN/FR/RU)
273 |
274 | Other keys can be added. More details concerning the evaluation procedure are available here: [link](https://competitions.codalab.org/competitions/34022#learn_the_details-evaluation).
275 |
276 |
277 | # Using this repository
278 |
279 | The code and data of this shared task are provided under a CC-BY-SA license.
280 | If you use thhis repository in your researcj, please cite the task paper (to appear):
281 | ```
282 | @inproceedings{mickus-etal-2022-semeval,
283 | title = "{S}emeval-2022 Task 1: {CODWOE} {--} Comparing Dictionaries and Word Embeddings",
284 | author = "Mickus, Timothee and
285 | Van Deemter, Kees and
286 | Constant, Mathieu and
287 | Paperno, Denis",
288 | booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
289 | month = jul,
290 | year = "2022",
291 | address = "Seattle, United States",
292 | publisher = "Association for Computational Linguistics",
293 | url = "https://aclanthology.org/2022.semeval-1.1",
294 | pages = "1--14",
295 | abstract = "Word embeddings have advanced the state of the art in NLP across numerous tasks. Understanding the contents of dense neural representations is of utmost interest to the computational semantics community. We propose to focus on relating these opaque word vectors with human-readable definitions, as found in dictionaries This problem naturally divides into two subtasks: converting definitions into embeddings, and converting embeddings into definitions. This task was conducted in a multilingual setting, using comparable sets of embeddings trained homogeneously.",
296 | }
297 | ```
298 |
299 | Also consider looking at participants' submissions:
300 | - [1Cademy at Semeval-2022 Task 1: Investigating the Effectiveness of Multilingual, Multitask, and Language-Agnostic Tricks for the Reverse Dictionary Task](https://aclanthology.org/2022.semeval-1.2/)
301 | - [BLCU-ICALL at SemEval-2022 Task 1: Cross-Attention Multitasking Framework for Definition Modeling](https://aclanthology.org/2022.semeval-1.3/)
302 | - [LingJing at SemEval-2022 Task 1: Multi-task Self-supervised Pre-training for Multilingual Reverse Dictionary](https://aclanthology.org/2022.semeval-1.4/)
303 | - [IRB-NLP at SemEval-2022 Task 1: Exploring the Relationship Between Words and Their Semantic Representations](https://aclanthology.org/2022.semeval-1.5/)
304 | - [TLDR at SemEval-2022 Task 1: Using Transformers to Learn Dictionaries and Representations](https://aclanthology.org/2022.semeval-1.6/)
305 | - [MMG at SemEval-2022 Task 1: A Reverse Dictionary approach based on a review of the dataset from a lexicographic perspective](https://aclanthology.org/2022.semeval-1.7/)
306 | - [Edinburgh at SemEval-2022 Task 1: Jointly Fishing for Word Embeddings and Definitions](https://aclanthology.org/2022.semeval-1.8/)
307 | - [RIGA at SemEval-2022 Task 1: Scaling Recurrent Neural Networks for CODWOE Dictionary Modeling](https://aclanthology.org/2022.semeval-1.9/)
308 | - [Uppsala University at SemEval-2022 Task 1: Can Foreign Entries Enhance an English Reverse Dictionary?](https://aclanthology.org/2022.semeval-1.10/)
309 | - [BL.Research at SemEval-2022 Task 1: Deep networks for Reverse Dictionary using embeddings and LSTM autoencoders](https://aclanthology.org/2022.semeval-1.11/)
310 | - [JSI at SemEval-2022 Task 1: CODWOE - Reverse Dictionary: Monolingual and cross-lingual approaches](https://aclanthology.org/2022.semeval-1.12/)
311 |
--------------------------------------------------------------------------------
/rankings/submission_ranks/results_revdict-electra-rankings.csv:
--------------------------------------------------------------------------------
1 | user,EN MSE,EN cos,EN rank,FR MSE,FR cos,FR rank,RU MSE,RU cos,RU rank,Comments,Date,filename
2 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:09.048119+00:00,fr.test.revdict.zip
3 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:11.803362+00:00,fr.test.revdict.zip
4 | pzchen,,,,,,,,,,baseline-sgns-$LANG,2022-02-01 12:04:13.848370+00:00,baseline-out-sgns.zip
5 | pzchen,,,,,,,,,,ae-skip-share-sgns-$LANG,2022-02-01 12:04:16.166846+00:00,ae-skip-share-out-sgns.zip
6 | tthhanh,,,,,,,,,,,2022-02-01 12:04:19.286712+00:00,revdict-baseline-preds.zip
7 | pzchen,12.0,15.0,17.0,10.0,11.0,16.0,11.0,12.0,18.0,baseline-electra-$LANG,2022-02-01 12:04:21.767316+00:00,baseline-out-electra.zip
8 | Nihed_Bendahman_,,,,22.0,22.0,22.0,,,,,2022-02-01 12:04:23.848808+00:00,fr.test.revdict.zip
9 | pzchen,,,,,,,,,,baseline-char-$LANG,2022-02-01 12:04:26.228123+00:00,baseline-out-char.zip
10 | pzchen,,,,,,,,,,ae-skip-share-char-$LANG,2022-02-01 12:04:30.244614+00:00,ae-skip-share-out-char.zip
11 | pzchen,6.0,9.0,15.0,5.0,5.0,10.0,6.0,6.0,14.0,ae-skip-share-electra-$LANG,2022-02-01 12:04:33.077318+00:00,ae-skip-share-out-electra.zip
12 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:35.657573+00:00,en.test.revdict.zip
13 | Nihed_Bendahman_,,,,8.0,9.0,22.0,,,,,2022-02-01 12:04:37.804439+00:00,fr.test.revdict.zip
14 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:40.219400+00:00,en.test.revdict.zip
15 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:44.200059+00:00,es.test.revdict.zip
16 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:04:46.487460+00:00,ru.test.revdict.zip
17 | emukans,,,,,,,,,,,2022-02-01 12:04:48.831698+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip
18 | Nihed_Bendahman_,9.0,11.0,23.0,,,,,,,,2022-02-01 12:04:50.938249+00:00,en.test.revdict.zip
19 | Nihed_Bendahman_,,,,,,,8.0,10.0,21.0,,2022-02-01 12:04:52.935214+00:00,ru.test.revdict.zip
20 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:05:25.788336+00:00,it.test.revdict.zip
21 | Locchi,,,,,,,,,,test,2022-02-01 12:05:28.772699+00:00,en.test.revdict.predicted.json.zip
22 | lukechan1231,,,,,,,,,,,2022-02-01 12:05:30.881540+00:00,en.char.test.defmod.json.zip
23 | emukans,,,,,,,,,,,2022-02-01 12:05:35.184513+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early-en-sgns.json.zip
24 | pzchen,4.0,2.0,12.0,2.0,2.0,13.0,3.0,2.0,10.0,revdict-ensemble1-$EMBED-$LANG,2022-02-01 12:05:37.277011+00:00,ensemble1.zip
25 | emukans,,,,,,,,,,,2022-02-01 12:05:39.220654+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip
26 | emukans,,,,,,,,,,,2022-02-01 12:05:41.571356+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-electra.json.zip
27 | emukans,,,,,,,,,,,2022-02-01 12:05:43.780984+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-sgns.json.zip
28 | emukans,,,,,,,,,,,2022-02-01 12:05:47.188910+00:00,defmod-lstm_ulbroka_tokenizer_2_layers_1024_hidden-en-char.json.zip
29 | emukans,,,,,,,,,,GRU CSLP 30 length SGNS,2022-02-01 12:05:49.582018+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_cslp-sgns.json.zip
30 | emukans,,,,,,,,,,GRU CS 30 length SGNS,2022-02-01 12:05:51.393164+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_cs-sgns.json.zip
31 | emukans,,,,,,,,,,GRU C 30 length SGNS,2022-02-01 12:05:53.548313+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_c-sgns.json.zip
32 | emukans,,,,,,,,,,GRU 2 layers 3072 50 length SGNS,2022-02-01 12:05:55.512222+00:00,defmod-gru_ulbroka_tokenizer_2_layers_3072_hidden_50_len-en-sgns.json.zip
33 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS,2022-02-01 12:05:58.681696+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden-en-sgns.json.zip
34 | emukans,,,,,,,,,,GRU 2 layers 512 30 length SGNS,2022-02-01 12:06:01.098634+00:00,defmod-gru_ulbroka_tokenizer_2_layers_512_hidden-en-sgns.json.zip
35 | emukans,,,,,,,,,,GRU 2 layers 4096 30 length SGNS,2022-02-01 12:06:03.357951+00:00,defmod-gru_ulbroka_tokenizer_2_layers_4096_hidden-en-sgns.json.zip
36 | emukans,,,,,,,,,,GRU 4 layers 2048 30 length SGNS,2022-02-01 12:06:05.612675+00:00,defmod-gru_ulbroka_tokenizer_4_layers_2048_hidden-en-sgns.json.zip
37 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:07.682648+00:00,en.char.test.defmod.json.zip
38 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:12.716454+00:00,en.char.test.defmod.json.zip
39 | lukechan1231,,,,,,,,,,,2022-02-01 12:06:15.022199+00:00,en.char.test.defmod.json.zip
40 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length SGNS,2022-02-01 12:06:16.911037+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden-en-sgns.json.zip
41 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS,2022-02-01 12:06:18.542822+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en-sgns.json.zip
42 | pzchen,,,,,,,,,,,2022-02-01 12:06:21.095892+00:00,ae-skip-share-out-sgns.zip
43 | pzchen,12.0,15.0,17.0,10.0,11.0,16.0,11.0,12.0,18.0,,2022-02-01 12:07:33.623337+00:00,baseline-out-electra.zip
44 | pzchen,,,,,,,,,,,2022-02-01 12:07:36.254210+00:00,baseline-out-char.zip
45 | pzchen,6.0,9.0,15.0,5.0,5.0,10.0,6.0,6.0,14.0,,2022-02-01 12:07:38.601592+00:00,ae-skip-share-out-electra.zip
46 | pzchen,2.0,3.0,13.0,3.0,3.0,14.0,4.0,3.0,11.0,revdict-ensemble2-$EMBED-$LANG,2022-02-01 12:07:40.475737+00:00,ensemble2.zip
47 | tthhanh,,,,,,,,,,,2022-02-01 12:07:43.919897+00:00,en_revdict_lstm.json.zip
48 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:46.434273+00:00,en.test.revdict.zip
49 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:49.300441+00:00,it.test.revdict.zip
50 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:51.717222+00:00,ru.test.revdict.zip
51 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:53.984159+00:00,fr.test.revdict.zip
52 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:07:58.188691+00:00,es.test.revdict.zip
53 | emukans,,,,,,,,,,Concat,2022-02-01 12:08:00.529661+00:00,defmod-concat.json.zip
54 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:03.084179+00:00,fr.test.revdict.zip
55 | pzchen,,,,,,,,,,,2022-02-01 12:08:05.796646+00:00,ae-skip-share-out-char.zip
56 | dkorenci,,,,,,,,,,,2022-02-01 12:08:08.242852+00:00,defmod-submitV1-sgns.zip
57 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:12.003987+00:00,fr.test.revdict.zip
58 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:14.268831+00:00,it.test.revdict.zip
59 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:16.292523+00:00,nouvelles_soumissions.zip
60 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:08:18.543585+00:00,char_vocab.zip
61 | dkorenci,,,,,,,,,,,2022-02-01 12:08:22.584864+00:00,defmod-submitV1-char.zip
62 | dkorenci,,,,,,,,,,,2022-02-01 12:08:24.686983+00:00,defmod-submitV1-electra.zip
63 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS S,2022-02-01 12:08:26.769185+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_s-sgns.json.zip
64 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS LP,2022-02-01 12:19:41.107170+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_lp-sgns.json.zip
65 | emukans,,,,,,,,,,GRU 2 layers 1024 30 length SGNS LPH,2022-02-01 12:19:44.238472+00:00,defmod-gru_ulbroka_tokenizer_2_layers_1024_hidden_30_len-en_lph-sgns.json.zip
66 | tthhanh,,,,,,,,,,,2022-02-01 12:19:47.815149+00:00,en_revdict_preds.bilstm.zip
67 | tthhanh,,,,,,,,,,,2022-02-01 12:19:50.341452+00:00,es_revdict_preds.bilstm.zip
68 | tthhanh,,,,,,,,,,,2022-02-01 12:19:52.441544+00:00,fr_revdict_preds.bilstm.zip
69 | tthhanh,,,,,,,,,,,2022-02-01 12:19:55.038641+00:00,it_revdict_preds.bilstm.zip
70 | tthhanh,,,,,,,,,,,2022-02-01 12:19:57.274701+00:00,ru_revdict_preds.bilstm.zip
71 | tthhanh,,,,,,,,,,,2022-02-01 12:19:59.946250+00:00,Archive.zip
72 | guntis,,,,,,,,,,ulbroka 4 layers 3072 epoch:005,2022-02-01 12:20:03.739221+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early_epoch_005-en-sgns.zip
73 | guntis,,,,,,,,,,ulbroka 4 layers 3072 epoch:193,2022-02-01 12:20:06.186445+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early_save04-en-sgns.zip
74 | guntis,,,,,,,,,,ulbroka 4 3072 electra epoch:55,2022-02-01 12:20:08.343455+00:00,defmod-gru_ulbroka_4_layers_3072_epoch_055-en-electra.zip
75 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:20:10.800772+00:00,nv_soumissions.zip
76 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:20:13.580339+00:00,nouv_soumissions.zip
77 | guntis,,,,,,,,,,ulbroka 4 3072 char epoch:56,2022-02-01 12:20:16.273107+00:00,defmod-gru_ulbroka_4_layers_3072_epoch_056-en-char.zip
78 | aardoiz,,,,,,,,,,,2022-02-01 12:20:19.295783+00:00,test_preds.zip
79 | guntis,,,,,,,,,,ulbroka 4 3072 electra epoch:005,2022-02-01 12:20:21.912198+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-005.zip
80 | pzchen,4.0,2.0,12.0,2.0,2.0,13.0,3.0,2.0,10.0,,2022-02-01 12:20:24.239699+00:00,ensemble1.zip
81 | pzchen,,,,,,,,,,,2022-02-01 12:20:26.428237+00:00,baseline-out-sgns.zip
82 | Nihed_Bendahman_,15.0,24.0,3.0,16.0,21.0,3.0,15.0,20.0,8.0,,2022-02-01 12:20:30.617090+00:00,soumissions_27-01-2022.zip
83 | chlrbgus321,,,,,,,,,,,2022-02-01 12:20:33.006741+00:00,revdict.zip
84 | emukans,,,,,,,,,,GRU 2 layers 500 tokenizer 1024 30 length SGNS LPH,2022-02-01 12:20:36.193554+00:00,defmod-gru_500_tokenizer_2_layers_1024_hidden_30_len-en_lp-sgns.json.zip
85 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 5 ep,2022-02-01 12:20:38.338631+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-sgns.json.zip
86 | pzchen,,,,,,,,,,ngrams,2022-02-01 12:20:40.511309+00:00,ngram.zip
87 | tthhanh,,,,,,,,,,,2022-02-01 12:49:02.633573+00:00,en_combined.json.zip
88 | tthhanh,,,,,,,,,,,2022-02-01 12:49:05.449378+00:00,Archive.zip
89 | Nihed_Bendahman_,7.0,12.0,23.0,6.0,7.0,22.0,9.0,8.0,21.0,,2022-02-01 12:49:07.558292+00:00,soumissions_28-01-2022.zip
90 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 15 ep,2022-02-01 12:49:10.444052+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-sgns.json 2.zip
91 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length CHAR LPH 15 ep,2022-02-01 12:49:12.536420+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-en_lp-char.json.zip
92 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length SGNS LPH 15 ep RU,2022-02-01 12:49:15.463661+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-ru_lp-sgns.json.zip
93 | emukans,,,,,,,,,,GRU 4 layers 3072 30 length ELECTRA LPH 15 ep,2022-02-01 12:49:19.448679+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_30_len-en_lp-electra.json.zip
94 | the0ne,,,,,,,,,,test submission: revdict sgns,2022-02-01 12:49:22.274702+00:00,revdict_sgns_results.zip
95 | Nihed_Bendahman_,15.0,24.0,3.0,16.0,21.0,3.0,15.0,20.0,8.0,,2022-02-01 12:49:24.564797+00:00,S1_test_revdict.zip
96 | Nihed_Bendahman_,,,,,,,,,,,2022-02-01 12:49:26.862551+00:00,S2_test_revdict.zip
97 | Nihed_Bendahman_,9.0,11.0,23.0,8.0,9.0,22.0,8.0,10.0,21.0,,2022-02-01 12:49:29.192438+00:00,S3_test_revdict.zip
98 | the0ne,,,,,,,,,,test submission: revdict char,2022-02-01 12:49:31.648469+00:00,revdict_char_results.zip
99 | the0ne,25.0,5.0,23.0,,,,,,,test submission: revdict electra,2022-02-01 12:49:33.989521+00:00,revdict_electra_results.zip
100 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:40.765121+00:00,en.sgns.test.defmod.json.zip
101 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:43.015657+00:00,es.char.test.defmod.json.zip
102 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:45.130238+00:00,fr.sgns.test.defmod.json.zip
103 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:47.544503+00:00,it.sgns.test.defmod.json.zip
104 | lukechan1231,,,,,,,,,,,2022-02-01 12:49:49.519259+00:00,ru.sgns.test.defmod.json.zip
105 | zhwa3087,,,,,,,,,,"2nd
106 | training: three languages.
107 | tricks: workpiece tokenizer trained on these three languages, and added language token when tokenizating the gloss.",2022-02-01 12:49:51.497139+00:00,predict1.json.zip
108 | aardoiz,,,,,,,,,,,2022-02-01 12:49:54.167640+00:00,test_preds_v2.zip
109 | tthhanh,,,,,,,,,,,2022-02-01 12:49:56.804493+00:00,Archive_en_cross.zip
110 | tthhanh,,,,,,,,,,,2022-02-01 12:49:58.780544+00:00,Archive_es_cross.zip
111 | tthhanh,,,,,,,,,,,2022-02-01 12:50:01.331861+00:00,Archive_fr_cross.zip
112 | the0ne,,,,,,,,,,multitask finetuning: sgns,2022-02-01 12:50:04.181990+00:00,revdict_sgns_results.zip
113 | tthhanh,,,,,,,,,,,2022-02-01 12:58:49.927108+00:00,Archive_fr_cross.zip
114 | the0ne,,,,,,,,,,multitask finetuning: char,2022-02-01 12:58:52.282929+00:00,revdict_char_results.zip
115 | the0ne,22.0,7.0,23.0,,,,,,,multitask finetuning: electra,2022-02-01 12:58:54.422590+00:00,revdict_electra_results.zip
116 | tthhanh,,,,,,,,,,,2022-02-01 12:58:56.632536+00:00,Archive_it_cross.zip
117 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 5 ep RU,2022-02-01 12:58:58.792452+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-sgns.json.zip
118 | tthhanh,,,,,,,,,,,2022-02-01 12:59:00.750771+00:00,Archive_ru_cross.zip
119 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep RU,2022-02-01 12:59:02.750990+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-electra.json.zip
120 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep EN,2022-02-01 12:59:04.934403+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-electra.json.zip
121 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length CHAR LPH 7 ep RU,2022-02-01 12:59:07.087390+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-ru_lp-char.json.zip
122 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length ELECTRA LPH 7 ep FR,2022-02-01 12:59:10.298601+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-fr_lp-electra.json.zip
123 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length SGNS LPH 6 ep IT,2022-02-01 12:59:12.655912+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-it_lp-sgns.json.zip
124 | emukans,,,,,,,,,,GRU 4 layers 3072 50 length CHAR LPH 6 ep EN,2022-02-01 12:59:14.921223+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-en_lp-char.json.zip
125 | Locchi,1.0,13.0,10.0,,,,,,,,2022-02-01 12:59:17.545753+00:00,en.test.revdict.predicted.zip
126 | Locchi,,,,,,,,,,,2022-02-01 12:59:21.423987+00:00,it.test.revdict.predicted.zip
127 | dkorenci,,,,,,,,,,,2022-02-01 12:59:24.548433+00:00,defmod-submitV2-allvec-8K.zip
128 | dkorenci,,,,,,,,,,,2022-02-01 12:59:26.793534+00:00,defmod-submitV2-sgns-8K.zip
129 | dkorenci,,,,,,,,,,,2022-02-01 12:59:29.383576+00:00,defmod-submitV2-allvec-5K.zip
130 | dkorenci,,,,,,,,,,,2022-02-01 12:59:31.531125+00:00,defmod-submitV2-sgns-5K.zip
131 | emukans,,,,,,,,,,,2022-02-01 12:59:33.615316+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_50_len-es_lp-sgns.json.zip
132 | WENGSYX,13.0,4.0,9.0,11.0,6.0,11.0,1.0,4.0,12.0,,2022-02-01 12:59:36.019237+00:00,en.test.defmod.zip
133 | Locchi,,,,,,,,,,,2022-02-01 12:59:40.379830+00:00,en.test.defmod.predicted.zip
134 | Locchi,,,,,,,,,,,2022-02-01 12:59:43.120882+00:00,en.test.defmod.predicted_v2.zip
135 | Locchi,,,,,,,,,,,2022-02-01 12:59:45.666414+00:00,it.test.defmod.predicted.zip
136 | Locchi,,,,,,,,,,,2022-02-01 12:59:48.464254+00:00,es.test.defmod.predicted.zip
137 | dkorenci,18.0,20.0,6.0,12.0,14.0,5.0,12.0,7.0,2.0,,2022-02-01 13:05:59.964077+00:00,revdict-v8k-b1024-avg.zip
138 | dkorenci,16.0,18.0,8.0,13.0,16.0,8.0,13.0,14.0,5.0,,2022-02-01 13:06:02.298756+00:00,revdict-v8k-b2048-avg.zip
139 | dkorenci,17.0,19.0,4.0,17.0,17.0,6.0,16.0,13.0,3.0,,2022-02-01 13:06:04.740677+00:00,revdict-v8k-b4096-avg.zip
140 | dkorenci,19.0,21.0,7.0,14.0,15.0,7.0,17.0,15.0,6.0,,2022-02-01 13:06:07.035962+00:00,revdict-v8k-b8192-avg.zip
141 | dkorenci,20.0,22.0,5.0,18.0,18.0,4.0,19.0,21.0,4.0,,2022-02-01 13:06:09.260075+00:00,revdict-v8k-b2048-avg-plat-e150.zip
142 | guntis,,,,,,,,,,electra 093,2022-02-01 13:06:11.475542+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-093.zip
143 | guntis,,,,,,,,,,sgns 315,2022-02-01 13:06:14.002128+00:00,defmod-gru_ulbroka_tokenizer_4_layers_3072_hidden_no_early-en-sgns-315.zip
144 | dkorenci,21.0,25.0,1.0,19.0,19.0,1.0,18.0,16.0,1.0,,2022-02-01 13:06:16.518200+00:00,revdict-v8k-b2048-mt7-avg-plat.zip
145 | guntis,,,,,,,,,,electra 085,2022-02-01 13:06:20.442528+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-085.zip
146 | WENGSYX,24.0,17.0,25.0,21.0,13.0,18.0,21.0,18.0,16.0,,2022-02-01 13:06:23.136127+00:00,results.zip
147 | the0ne,10.0,6.0,23.0,,,,,,,revdict contrastive,2022-02-01 13:06:25.470976+00:00,revdict_contrastive.zip
148 | dkorenci,,,,,,,,,,,2022-02-01 13:06:27.560421+00:00,defmod-submitV2-CF-allvec-sgns-8K.zip
149 | dkorenci,,,,,,,,,,,2022-02-01 13:06:29.686010+00:00,defmod-submitV2-CF-allvec-sgns-5K.zip
150 | talent404,,,,,,,,,,english defmod,2022-02-01 13:06:34.144091+00:00,test.json.zip
151 | zhwa3087,,,,,,,,,,"3rd
152 | training: five languages
153 | tricks: Unigram tokenizer trained on these five languages, language tokens, and dynamic weights averaging losses for sgns and char.",2022-02-01 13:06:36.534003+00:00,3rd.zip
154 | zhwa3087,,,,,,,,,,"final
155 | training: five languages
156 | tricks: Unigram tokenizer trained on these five languages, language tokens, and dynamic weights averaging losses for sgns and char.",2022-02-01 13:06:38.780846+00:00,final.zip
157 | guntis,,,,,,,,,,electra 101,2022-02-01 13:06:43.743485+00:00,defmod-gru_ulbroka_4_layers_3072-en-electra-101.zip
158 | dkorenci,,,,,,,,,,,2022-02-01 13:06:46.158191+00:00,defmod-submit-V3-allvec.zip
159 | dkorenci,,,,,,,,,,,2022-02-01 13:06:48.604489+00:00,defmod-submit-V3-sgns.zip
160 | dkorenci,,,,,,,,,,,2022-02-01 13:06:50.621731+00:00,defmod-submit-V4-allvec-gru.zip
161 | dkorenci,,,,,,,,,,,2022-02-01 13:06:56.750144+00:00,defmod-submit-V4-allvec-lstm.zip
162 | WENGSYX,24.0,17.0,25.0,21.0,13.0,18.0,21.0,18.0,16.0,,2022-02-01 21:42:34.383443+00:00,results.zip
163 | dkorenci,,,,,,,,,,,2022-02-01 10:31:20.217420+00:00,defmod-submitV3-CF-allvec-sgns.zip
164 | dkorenci,,,,,,,,,,,2022-02-01 11:40:22.727710+00:00,defmod-submitV4-CF-allvec-lstm-gru.zip
165 | cunliang.kong,,,,,,,,,,,2022-02-01 11:55:05.593202+00:00,results.zip
166 | cunliang.kong,,,,,,,,,,,2022-02-01 11:55:05.602946+00:00,results.zip
167 |
--------------------------------------------------------------------------------