├── code
├── source
│ ├── __init__.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ └── ir_stats.py
│ ├── model
│ │ ├── sequenceTagger.py
│ │ ├── attentionLayer.py
│ │ ├── encoder.py
│ │ └── sentenceClassifier.py
│ ├── constants.py
│ ├── dataHandling
│ │ ├── sampling.py
│ │ └── sofc_exp_utils.py
│ ├── main_preprocess.py
│ └── utils.py
├── README.md
└── scripts
│ ├── run_experiment_sentence_classification.sh
│ ├── run_experiment_entity_typing.sh
│ ├── run_experiment_slot_filling.sh
│ └── run_cross_validation.sh
├── data
├── embeddings
│ └── readme.txt
└── models
│ └── SciBERT
│ └── readme.txt
├── sofc-exp-corpus
├── sofc_exp_guidelines.pdf
└── annotations
│ ├── frames
│ ├── PMC4986314.csv
│ ├── PMC4698742.csv
│ ├── PMC3793895.csv
│ ├── PMC5706185.csv
│ ├── PMC3730159.csv
│ ├── PMC4222441.csv
│ ├── PMC6370853.csv
│ ├── PMC6632008.csv
│ ├── PMC5944822.csv
│ ├── PMC4735809.csv
│ ├── PMC4663492.csv
│ ├── PMC6523084.csv
│ ├── PMC5700654.csv
│ ├── PMC6247067.csv
│ ├── PMC5456601.csv
│ ├── PMC6249295.csv
│ ├── PMC6427619.csv
│ ├── PMC6445146.csv
│ ├── PMC5456866.csv
│ ├── PMC5457052.csv
│ └── PMC6073263.csv
│ └── sentences
│ ├── PMC4698742.csv
│ ├── PMC4313086.csv
│ ├── PMC6073263.csv
│ ├── PMC6427619.csv
│ ├── PMC4578433.csv
│ ├── PMC4663492.csv
│ ├── PMC6164086.csv
│ ├── PMC6249295.csv
│ ├── PMC5457052.csv
│ ├── PMC5456866.csv
│ ├── PMC3564701.csv
│ ├── PMC6632008.csv
│ ├── PMC4772004.csv
│ ├── PMC5331335.csv
│ ├── PMC5700654.csv
│ ├── PMC4992832.csv
│ ├── PMC5793538.csv
│ ├── PMC4673446.csv
│ ├── PMC5456869.csv
│ ├── PMC6337513.csv
│ ├── PMC5216129.csv
│ ├── PMC4986314.csv
│ ├── PMC5457246.csv
│ ├── PMC5848893.csv
│ ├── PMC6461657.csv
│ ├── PMC6370853.csv
│ ├── PMC6247067.csv
│ ├── PMC5457058.csv
│ ├── PMC6517467.csv
│ ├── PMC5457196.csv
│ ├── PMC5944822.csv
│ ├── PMC6523084.csv
│ ├── PMC4222441.csv
│ └── PMC3793895.csv
├── sofcexp.yml
└── .gitignore
/code/source/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/code/source/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
1 | See README.md at top level of this project!
--------------------------------------------------------------------------------
/data/embeddings/readme.txt:
--------------------------------------------------------------------------------
1 | Place pre-trained embeddings here.
--------------------------------------------------------------------------------
/sofc-exp-corpus/sofc_exp_guidelines.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/sofc-exp_textmining_resources/HEAD/sofc-exp-corpus/sofc_exp_guidelines.pdf
--------------------------------------------------------------------------------
/data/models/SciBERT/readme.txt:
--------------------------------------------------------------------------------
1 | Place PyTorch SciBERT model files here using one folder per model, or change the value of the "pretrained_bert" command line argument to point to your model files.
--------------------------------------------------------------------------------
/code/scripts/run_experiment_sentence_classification.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Runs one experiment
4 |
5 | echo "Current fold if given: $1"
6 |
7 | cd ../source
8 |
9 | source activate transformers # activate your conda envinroment here
10 |
11 | # For sentence classification BERT model: semantics of parameters see main.py
12 | python3 -u main.py -save_dir ../../models/bertLarge100epochs -subsampling 0.3 -batch_size 10 -task sentence \
13 | -optim adamW -epochs 100 -lr 4e-7 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \
14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert
15 |
16 |
--------------------------------------------------------------------------------
/code/scripts/run_experiment_entity_typing.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Runs one experiment
4 |
5 | echo "Current fold if given: $1"
6 |
7 | cd ../source
8 |
9 | source activate sofcexp # activate your conda environment here
10 |
11 | # For entity extraction and typing: semantics of parameters see main.py
12 | python3 -u main.py -save_dir ../../models/scibertEntityTyping100epochs -subsampling 0.0 -batch_size 10 -task entity_typing \
13 | -optim adamW -epochs 100 -lr 1e-5 -lr_bert 1e-5 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \
14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert
15 |
--------------------------------------------------------------------------------
/code/scripts/run_experiment_slot_filling.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Runs one experiment
4 |
5 | echo "Current fold if given: $1"
6 |
7 | cd ../source
8 |
9 | source activate transformers # activate your conda environment here
10 |
11 | # For slot filling SciBERT model: semantics of parameters see main.py
12 | python3 -u main.py -save_dir ../../models/scibertSlotFilling100epochs -subsampling 0.0 -batch_size 10 -task slot_typing \
13 | -optim adamW -epochs 100 -lr 1e-5 -lr_bert 1e-5 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \
14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert
15 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC4986314.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 1 823 831
2 | SPAN 2 EXPERIMENT:current_exp 2 24 33
3 | SPAN 3 MATERIAL 22 60 86
4 | SPAN 4 MATERIAL 22 88 91
5 | SPAN 5 EXPERIMENT:current_exp 22 97 101
6 | SPAN 6 MATERIAL 22 119 122
7 | SPAN 7 MATERIAL 125 8 11
8 | SPAN 8 EXPERIMENT:current_exp 125 65 75
9 | SPAN 9 VALUE 125 102 112
10 | SPAN 10 VALUE 125 117 127
11 | SPAN 11 MATERIAL 126 91 94
12 | SPAN 12 EXPERIMENT:current_exp 162 85 96
13 | SPAN 13 MATERIAL 162 145 148
14 | SPAN 14 MATERIAL 162 173 176
15 | EXPERIMENT 1 5
16 | electrolyte_material 3
17 | cathode_material 6
18 | EXPERIMENT 2 8
19 | cathode_material 7
20 | resistance 9
21 | resistance 10
22 | EXPERIMENT 3 12
23 | cathode_material 13
24 | electrolyte_material 14
25 | LINK coreference 3 4
26 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4698742.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 727
2 | 2 0 728 1057
3 | 3 0 1058 1278
4 | 4 1 1279 1530
5 | 5 0 1531 1956
6 | 6 0 1957 2091
7 | 7 0 2092 2166
8 | 8 0 2167 2338
9 | 9 0 2339 2453
10 | 10 0 2454 2541
11 | 11 0 2542 2717
12 | 12 0 2718 2791
13 | 13 0 2792 2925
14 | 14 0 2926 3013
15 | 15 0 3014 3293
16 | 16 1 3294 3615
17 | 17 0 3616 3985
18 | 18 0 3986 4239
19 | 19 0 4240 4466
20 | 20 0 4467 4667
21 | 21 0 4668 5031
22 | 22 0 5032 5305
23 | 23 0 5306 5626
24 | 24 0 5627 5962
25 | 25 0 5963 6279
26 | 26 0 6280 6459
27 | 27 0 6460 6769
28 | 28 0 6770 6920
29 | 29 0 6921 7156
30 | 30 0 7157 7341
31 | 31 0 7342 7765
32 | 32 0 7766 7950
33 | 33 0 7951 8166
34 | 34 0 8167 8362
35 | 35 0 8363 8630
36 | 36 0 8631 8808
37 | 37 0 8809 8905
38 | 38 0 8906 9146
39 | 39 0 9147 9283
40 | 40 0 9284 9549
41 | 41 0 9550 9792
42 | 42 0 9793 9931
43 | 43 0 9932 10217
44 | 44 0 10218 10572
45 | 45 0 10573 10839
46 | 46 0 10840 11029
47 | 47 0 11030 11313
48 | 48 0 11314 11512
49 | 49 0 11513 11755
50 | 50 0 11756 11909
51 | 51 0 11910 12151
52 | 52 0 12152 12406
53 | 53 0 12407 12548
54 | 54 0 12549 12656
55 | 55 1 12657 12770
56 | 56 0 12771 12877
57 | 57 1 12878 13144
58 | 58 0 13145 13424
59 | 59 0 13425 13650
60 | 60 1 13651 13868
61 | 61 0 13869 13925
62 | 62 0 13926 13936
63 | 63 0 13937 14073
64 | 64 0 14074 14078
65 | 65 0 14079 14124
66 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4313086.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 594
2 | 2 0 595 722
3 | 3 0 723 824
4 | 4 1 825 1001
5 | 5 0 1002 1091
6 | 6 1 1092 1309
7 | 7 1 1310 1513
8 | 8 0 1514 1609
9 | 9 0 1610 1926
10 | 10 0 1927 2217
11 | 11 0 2218 2432
12 | 12 0 2433 2504
13 | 13 0 2505 2639
14 | 14 0 2640 2794
15 | 15 0 2795 2991
16 | 16 0 2992 3121
17 | 17 1 3122 3216
18 | 18 1 3217 3439
19 | 19 0 3440 3565
20 | 20 0 3566 3570
21 | 21 0 3571 3818
22 | 22 0 3819 3837
23 | 23 0 3838 3999
24 | 24 0 4000 4154
25 | 25 1 4155 4304
26 | 26 1 4305 4459
27 | 27 1 4460 4770
28 | 28 1 4771 4923
29 | 29 1 4924 5195
30 | 30 0 5196 5387
31 | 31 0 5388 5447
32 | 32 0 5448 5710
33 | 33 1 5711 5869
34 | 34 0 5870 6009
35 | 35 0 6010 6186
36 | 36 0 6187 6457
37 | 37 1 6458 6571
38 | 38 1 6572 6708
39 | 39 1 6709 6810
40 | 40 0 6811 6942
41 | 41 0 6943 7103
42 | 42 1 7104 7288
43 | 43 0 7289 7534
44 | 44 0 7535 7667
45 | 45 1 7668 7827
46 | 46 0 7828 8002
47 | 47 0 8003 8231
48 | 48 0 8232 8403
49 | 49 0 8404 8649
50 | 50 0 8650 8861
51 | 51 0 8862 8967
52 | 52 0 8968 9104
53 | 53 0 9105 9109
54 | 54 0 9110 9189
55 | 55 0 9190 9562
56 | 56 0 9563 9607
57 | 57 0 9608 9863
58 | 58 0 9864 10081
59 | 59 0 10082 10153
60 | 60 0 10154 10266
61 | 61 0 10267 10381
62 | 62 0 10382 10386
63 | 63 0 10387 10434
64 | 64 0 10435 10523
65 | 65 0 10524 10611
66 | 66 0 10612 10737
67 | 67 0 10738 10976
68 | 68 0 10977 11065
69 | 69 0 11066 11198
70 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC4698742.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 DEVICE 1 487 491
2 | SPAN 2 MATERIAL 1 497 501
3 | SPAN 3 EXPERIMENT:current_exp 1 547 557
4 | SPAN 4 EXPERIMENT:current_exp 4 39 49
5 | SPAN 5 EXPERIMENT:current_exp 4 90 96
6 | SPAN 6 VALUE 4 111 121
7 | SPAN 7 VALUE 4 125 131
8 | SPAN 8 VALUE 4 137 142
9 | SPAN 9 MATERIAL 4 149 153
10 | SPAN 10 EXPERIMENT:current_exp 16 55 61
11 | SPAN 11 DEVICE 16 91 95
12 | SPAN 12 EXPERIMENT:current_exp 16 108 116
13 | SPAN 13 VALUE 55 3 9
14 | SPAN 14 VALUE 55 15 20
15 | SPAN 15 MATERIAL 55 27 30
16 | SPAN 16 EXPERIMENT:current_exp 55 70 73
17 | SPAN 17 VALUE 55 81 91
18 | SPAN 18 DEVICE 57 27 31
19 | SPAN 19 VALUE 57 37 42
20 | SPAN 20 MATERIAL 57 43 47
21 | SPAN 21 EXPERIMENT:future_work 57 54 61
22 | SPAN 22 VALUE 57 91 101
23 | SPAN 23 VALUE 57 105 111
24 | SPAN 24 EXPERIMENT:current_exp 60 39 49
25 | SPAN 25 VALUE 60 100 110
26 | SPAN 26 VALUE 60 114 120
27 | EXPERIMENT 1 3
28 | device 1
29 | electrolyte_material 2
30 | EXPERIMENT 2 5
31 | power_density 6
32 | working_temperature 7
33 | electrolyte_material 9
34 | EXPERIMENT 3 10
35 | device 11
36 | EXPERIMENT 4 16
37 | working_temperature 13
38 | electrolyte_material 15
39 | power_density 17
40 | EXPERIMENT 5 21
41 | device 18
42 | electrolyte_material 20
43 | power_density 22
44 | working_temperature 23
45 | EXPERIMENT 6 24
46 | power_density 25
47 | working_temperature 26
48 | LINK same_experiment 4 5
49 | LINK thickness 9 8
50 | LINK same_experiment 10 12
51 | LINK thickness 15 14
52 | LINK thickness 20 19
53 |
--------------------------------------------------------------------------------
/code/scripts/run_cross_validation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Runs 5-fold cross validation for SOFC-Exp corpus related experiments.
4 | # Paths assume that this script is run from within the scripts folder.
5 |
6 | for f in {1..5}; do
7 |
8 | # Uncomment one of the run_experiment... lines depending on which experiments you want to do.
9 | # Starts five runs in parallel - change if your computing environment allows only one GPU process at a time,
10 | # add call to scheduler, etc.
11 |
12 | # Variations of the models tested are configured in the respective run_experiments... file.
13 |
14 | # Experiment
15 | # run_experiment_sentence_classification.sh $f
16 |
17 | # Entity Types
18 | # run_experiment_entity_typing.sh $f
19 |
20 | # Slot filling
21 | # run_experiment_slot_filling.sh $f
22 | done
23 |
24 | # The above processes all write their results to prediction files.
25 | # Once they are done, collect results and compute performance statistics.
26 | # Use the file source/evaluation/evaluate_cross_validation.py with appropriate command line arguments.
27 |
28 | # Examples:
29 | # sentence classification:
30 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "multiclass" -num_labels 2
31 | # entity typing:
32 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "conll" -task "entity_types"
33 | # slot typing:
34 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "conll" -task "slot_types"
35 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6073263.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 616
2 | 2 0 617 791
3 | 3 0 792 1115
4 | 4 0 1116 1195
5 | 5 1 1196 1391
6 | 6 1 1392 1507
7 | 7 1 1508 1708
8 | 8 1 1709 1984
9 | 9 1 1985 2094
10 | 10 0 2095 2263
11 | 11 1 2264 2360
12 | 12 0 2361 2451
13 | 13 0 2452 2615
14 | 14 1 2616 2796
15 | 15 1 2797 2989
16 | 16 1 2990 3134
17 | 17 0 3135 3353
18 | 18 0 3354 3471
19 | 19 0 3472 3630
20 | 20 0 3631 3791
21 | 21 0 3792 3835
22 | 22 0 3836 3932
23 | 23 0 3933 4025
24 | 24 0 4026 4183
25 | 25 0 4184 4265
26 | 26 0 4266 4564
27 | 27 0 4565 4684
28 | 28 0 4685 4725
29 | 29 0 4726 4961
30 | 30 0 4962 5131
31 | 31 0 5132 5273
32 | 32 1 5274 5498
33 | 33 0 5499 5558
34 | 34 0 5559 5663
35 | 35 0 5664 5802
36 | 36 0 5803 5963
37 | 37 0 5964 6172
38 | 38 0 6173 6249
39 | 39 0 6250 6368
40 | 40 0 6369 6516
41 | 41 0 6517 6643
42 | 42 0 6644 6749
43 | 43 0 6750 6897
44 | 44 0 6898 7032
45 | 45 0 7033 7178
46 | 46 0 7179 7373
47 | 47 0 7374 7480
48 | 48 0 7481 7630
49 | 49 0 7631 7754
50 | 50 0 7755 7874
51 | 51 0 7875 8017
52 | 52 0 8018 8130
53 | 53 1 8131 8353
54 | 54 0 8354 8517
55 | 55 0 8518 8771
56 | 56 0 8772 8913
57 | 57 0 8914 9122
58 | 58 0 9123 9260
59 | 59 0 9261 9319
60 | 60 0 9320 9437
61 | 61 0 9438 9501
62 | 62 0 9502 9641
63 | 63 0 9642 9840
64 | 64 0 9841 10007
65 | 65 0 10008 10171
66 | 66 0 10172 10253
67 | 67 0 10254 10355
68 | 68 0 10356 10532
69 | 69 0 10533 10609
70 | 70 1 10610 10781
71 | 71 0 10782 10905
72 | 72 0 10906 11005
73 | 73 1 11006 11105
74 | 74 0 11106 11166
75 | 75 0 11167 11280
76 | 76 1 11281 11403
77 | 77 1 11404 11548
78 | 78 1 11549 11653
79 | 79 1 11654 11883
80 | 80 0 11884 12043
81 | 81 0 12044 12218
82 | 82 0 12219 12370
83 | 83 0 12371 12573
84 | 84 0 12574 12675
85 | 85 0 12676 12814
86 | 86 1 12815 12919
87 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6427619.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 717
2 | 2 0 718 831
3 | 3 1 832 940
4 | 4 0 941 1098
5 | 5 1 1099 1299
6 | 6 0 1300 1460
7 | 7 0 1461 1626
8 | 8 0 1627 1714
9 | 9 0 1715 1894
10 | 10 0 1895 2008
11 | 11 0 2009 2170
12 | 12 0 2171 2293
13 | 13 0 2294 2373
14 | 14 0 2374 2439
15 | 15 0 2440 2574
16 | 16 0 2575 2693
17 | 17 0 2694 2809
18 | 18 0 2810 3026
19 | 19 1 3027 3197
20 | 20 1 3198 3323
21 | 21 0 3324 3488
22 | 22 0 3489 3574
23 | 23 0 3575 3678
24 | 24 0 3679 3789
25 | 25 0 3790 3938
26 | 26 0 3939 4150
27 | 27 0 4151 4283
28 | 28 0 4284 4459
29 | 29 0 4460 4555
30 | 30 0 4556 4623
31 | 31 0 4624 4731
32 | 32 0 4732 4814
33 | 33 0 4815 4941
34 | 34 0 4942 5024
35 | 35 0 5025 5100
36 | 36 0 5101 5194
37 | 37 0 5195 5412
38 | 38 0 5413 5497
39 | 39 0 5498 5687
40 | 40 0 5688 5963
41 | 41 0 5964 6152
42 | 42 0 6153 6315
43 | 43 0 6316 6418
44 | 44 0 6419 6654
45 | 45 0 6655 6807
46 | 46 1 6808 6864
47 | 47 0 6865 6989
48 | 48 0 6990 7176
49 | 49 0 7177 7293
50 | 50 0 7294 7439
51 | 51 0 7440 7537
52 | 52 0 7538 7646
53 | 53 0 7647 7766
54 | 54 0 7767 7895
55 | 55 0 7896 7990
56 | 56 0 7991 8179
57 | 57 0 8180 8391
58 | 58 0 8392 8478
59 | 59 0 8479 8637
60 | 60 0 8638 8703
61 | 61 0 8704 8882
62 | 62 0 8883 8947
63 | 63 0 8948 9026
64 | 64 0 9027 9146
65 | 65 0 9147 9283
66 | 66 0 9284 9428
67 | 67 1 9429 9633
68 | 68 0 9634 9836
69 | 69 0 9837 9957
70 | 70 0 9958 10107
71 | 71 0 10108 10215
72 | 72 0 10216 10357
73 | 73 0 10358 10508
74 | 74 0 10509 10700
75 | 75 1 10701 10868
76 | 76 0 10869 11059
77 | 77 0 11060 11257
78 | 78 0 11258 11331
79 | 79 1 11332 11483
80 | 80 0 11484 11665
81 | 81 0 11666 11768
82 | 82 1 11769 12120
83 | 83 1 12121 12195
84 | 84 1 12196 12505
85 | 85 0 12506 12606
86 | 86 0 12607 12687
87 | 87 0 12688 12784
88 | 88 0 12785 12892
89 | 89 1 12893 13073
90 | 90 1 13074 13209
91 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4578433.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 1415
2 | 2 1 1416 2054
3 | 3 0 2055 2196
4 | 4 0 2197 2492
5 | 5 0 2493 2675
6 | 6 0 2676 2871
7 | 7 0 2872 3105
8 | 8 0 3106 3539
9 | 9 0 3540 3734
10 | 10 1 3735 3931
11 | 11 1 3932 4061
12 | 12 0 4062 4283
13 | 13 0 4284 4572
14 | 14 0 4573 5003
15 | 15 0 5004 5315
16 | 16 0 5316 5467
17 | 17 0 5468 5771
18 | 18 1 5772 6018
19 | 19 1 6019 6247
20 | 20 1 6248 6477
21 | 21 1 6478 6658
22 | 22 0 6659 6901
23 | 23 1 6902 7334
24 | 24 0 7335 7662
25 | 25 0 7663 7893
26 | 26 0 7894 8049
27 | 27 0 8050 8152
28 | 28 0 8153 8289
29 | 29 0 8290 8479
30 | 30 0 8480 8642
31 | 31 0 8643 8769
32 | 32 0 8770 9015
33 | 33 0 9016 9313
34 | 34 0 9314 9638
35 | 35 1 9639 9765
36 | 36 0 9766 10042
37 | 37 1 10043 10198
38 | 38 0 10199 10484
39 | 39 0 10485 10748
40 | 40 0 10749 10817
41 | 41 0 10818 11106
42 | 42 1 11107 11338
43 | 43 0 11339 11772
44 | 44 0 11773 12027
45 | 45 0 12028 12120
46 | 46 0 12121 12475
47 | 47 0 12476 12665
48 | 48 0 12666 12839
49 | 49 0 12840 12955
50 | 50 0 12956 13118
51 | 51 0 13119 13375
52 | 52 1 13376 13504
53 | 53 0 13505 13769
54 | 54 0 13770 14032
55 | 55 0 14033 14279
56 | 56 1 14280 14495
57 | 57 0 14496 14728
58 | 58 0 14729 14844
59 | 59 0 14845 14899
60 | 60 0 14900 14988
61 | 61 0 14989 15206
62 | 62 0 15207 15284
63 | 63 0 15285 15386
64 | 64 0 15387 15444
65 | 65 0 15445 15513
66 | 66 0 15514 15591
67 | 67 0 15592 15668
68 | 68 0 15669 15813
69 | 69 0 15814 15901
70 | 70 0 15902 16001
71 | 71 0 16002 16152
72 | 72 0 16153 16282
73 | 73 0 16283 16388
74 | 74 0 16389 16505
75 | 75 1 16506 16702
76 | 76 0 16703 17060
77 | 77 0 17061 17298
78 | 78 0 17299 17411
79 | 79 0 17412 17579
80 | 80 0 17580 17653
81 | 81 0 17654 17771
82 | 82 0 17772 17940
83 | 83 0 17941 18068
84 | 84 0 18069 18279
85 | 85 0 18280 18408
86 | 86 1 18409 18590
87 | 87 0 18591 18753
88 |
--------------------------------------------------------------------------------
/code/source/model/sequenceTagger.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 |
20 | import logging
21 |
22 | import torch
23 | from torch import nn
24 |
25 | from model.encoder import Encoder
26 |
27 |
28 | log = logging.getLogger("sequence_tagger")
29 |
30 |
31 | class SequenceTagger(nn.Module):
32 | def __init__(self, weights_matrix, embedding_options, options, device):
33 |
34 | super(SequenceTagger, self).__init__()
35 |
36 | print("Initializing sequence tagger ...")
37 |
38 | self.encoder = Encoder(weights_matrix, embedding_options, options, device)
39 |
40 | self.linear = torch.nn.Linear(options['hidden_size'] * 2, options['num_labels'])
41 |
42 | self.to(device)
43 |
44 |
45 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths):
46 | sentence_tensor, output_lengths = self.encoder(tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths)
47 |
48 | features = self.linear(sentence_tensor)
49 |
50 | assert(not torch.isnan(features).any())
51 | return features
52 |
53 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC3793895.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 12 295 319
2 | SPAN 2 DEVICE 13 286 314
3 | SPAN 3 EXPERIMENT:current_exp 13 342 349
4 | SPAN 4 MATERIAL 13 431 442
5 | SPAN 5 DEVICE 18 178 199
6 | SPAN 6 MATERIAL 59 140 173
7 | SPAN 7 MATERIAL 59 174 177
8 | SPAN 8 EXPERIMENT:current_exp 123 136 142
9 | SPAN 9 MATERIAL 123 148 149
10 | SPAN 10 EXPERIMENT:current_exp 123 150 152
11 | SPAN 11 MATERIAL 123 163 175
12 | SPAN 12 MATERIAL 123 186 198
13 | SPAN 13 VALUE 123 202 208
14 | SPAN 14 MATERIAL 131 103 137
15 | SPAN 15 DEVICE 154 12 17
16 | SPAN 16 EXPERIMENT:current_exp 154 23 31
17 | SPAN 17 DEVICE 154 117 121
18 | SPAN 18 MATERIAL 155 36 62
19 | SPAN 19 EXPERIMENT:general_info 155 70 72
20 | SPAN 20 DEVICE 155 95 99
21 | SPAN 21 EXPERIMENT:previous_work 155 113 121
22 | SPAN 22 VALUE 155 144 165
23 | SPAN 23 VALUE 155 169 175
24 | SPAN 24 MATERIAL 156 68 102
25 | SPAN 25 EXPERIMENT:current_exp 156 204 211
26 | SPAN 26 VALUE 156 222 232
27 | SPAN 27 MATERIAL 156 241 267
28 | SPAN 28 VALUE 156 278 284
29 | SPAN 29 MATERIAL 156 290 304
30 | SPAN 30 EXPERIMENT:current_exp 166 11 19
31 | SPAN 31 MATERIAL 166 24 48
32 | SPAN 32 VALUE 166 55 77
33 | SPAN 33 VALUE 166 81 88
34 | SPAN 34 EXPERIMENT:current_exp 167 18 26
35 | SPAN 35 MATERIAL 167 31 64
36 | EXPERIMENT 1 3
37 | device 2
38 | cathode_material 4
39 | EXPERIMENT 2 8
40 | cathode_material 9
41 | EXPERIMENT 3 10
42 | fuel_used 11
43 | fuel_used 12
44 | working_temperature 13
45 | EXPERIMENT 4 16
46 | device 15
47 | device 17
48 | EXPERIMENT 5 19
49 | cathode_material 18
50 | device 20
51 | EXPERIMENT 6 21
52 | resistance 22
53 | working_temperature 23
54 | EXPERIMENT 7 25
55 | resistance 26
56 | cathode_material 27
57 | working_temperature 28
58 | electrolyte_material 29
59 | EXPERIMENT 8 30
60 | cathode_material 31
61 | resistance 32
62 | working_temperature 33
63 | EXPERIMENT 9 34
64 | cathode_material 35
65 | LINK same_experiment 8 10
66 | LINK experiment_variation 25 21
67 | LINK experiment_variation 34 30
68 | LINK coreference 1 4
69 | LINK coreference 6 7
70 |
--------------------------------------------------------------------------------
/sofcexp.yml:
--------------------------------------------------------------------------------
1 | name: sofcexp
2 | channels:
3 | - pytorch
4 | - defaults
5 | dependencies:
6 | - blas=1.0=mkl
7 | - ca-certificates=2020.6.24=0
8 | - certifi=2020.6.20=py38_0
9 | - cudatoolkit=10.2.89=hfd86e86_1
10 | - freetype=2.10.2=h5ab3b9f_0
11 | - intel-openmp=2020.1=217
12 | - joblib=0.15.1=py_0
13 | - jpeg=9b=habf39ab_1
14 | - ld_impl_linux-64=2.33.1=h53a641e_7
15 | - libedit=3.1.20191231=h7b6447c_0
16 | - libffi=3.3=he6710b0_1
17 | - libgcc-ng=9.1.0=hdf63c60_0
18 | - libgfortran-ng=7.3.0=hdf63c60_0
19 | - libpng=1.6.37=hbc83047_0
20 | - libstdcxx-ng=9.1.0=hdf63c60_0
21 | - libtiff=4.1.0=h2733197_1
22 | - lz4-c=1.9.2=he6710b0_0
23 | - mkl=2019.4=243
24 | - mkl-service=2.3.0=py38he904b0f_0
25 | - mkl_fft=1.1.0=py38h23d657b_0
26 | - mkl_random=1.1.0=py38h962f231_0
27 | - ncurses=6.2=he6710b0_1
28 | - ninja=1.9.0=py38hfd86e86_0
29 | - numpy=1.18.5=py38ha1c710e_0
30 | - numpy-base=1.18.5=py38hde5b4d6_0
31 | - olefile=0.46=py_0
32 | - openssl=1.1.1g=h7b6447c_0
33 | - pillow=7.1.2=py38hb39fc2d_0
34 | - pip=20.1.1=py38_1
35 | - python=3.8.3=hcff3b4d_0
36 | - readline=8.0=h7b6447c_0
37 | - scikit-learn=0.23.1=py38h423224d_0
38 | - scipy=1.5.0=py38h0b6359f_0
39 | - setuptools=47.3.1=py38_0
40 | - six=1.15.0=py_0
41 | - sqlite=3.32.3=h62c20be_0
42 | - threadpoolctl=2.1.0=pyh5ca1d4c_0
43 | - tk=8.6.10=hbc83047_0
44 | - wheel=0.34.2=py38_0
45 | - xz=5.2.5=h7b6447c_0
46 | - zlib=1.2.11=h7b6447c_3
47 | - zstd=1.4.4=h0b5b093_3
48 | - pytorch=1.5.1=py3.8_cuda10.2.89_cudnn7.6.5_0
49 | - torchvision=0.6.1=py38_cu102
50 | - pip:
51 | - chardet==3.0.4
52 | - click==7.1.2
53 | - filelock==3.0.12
54 | - idna==2.10
55 | - packaging==20.4
56 | - pyparsing==2.4.7
57 | - regex==2020.6.8
58 | - requests==2.24.0
59 | - sacremoses==0.0.43
60 | - sentencepiece==0.1.92
61 | - tokenizers==0.8.0rc4
62 | - torch==1.5.1
63 | - tqdm==4.47.0
64 | - transformers==3.0.0
65 | - urllib3==1.25.9
66 | prefix: INSERT-YOUR-CONDA-PATH/envs/sofcexp
67 |
68 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5706185.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:general_info 376 39 46
2 | SPAN 2 DEVICE 376 68 115
3 | SPAN 3 DEVICE 376 117 122
4 | SPAN 4 EXPERIMENT:previous_work 379 23 34
5 | SPAN 5 MATERIAL 379 35 64
6 | SPAN 6 MATERIAL 379 66 71
7 | SPAN 7 EXPERIMENT:previous_work 379 107 115
8 | SPAN 8 DEVICE 379 147 152
9 | SPAN 9 MATERIAL 380 4 9
10 | SPAN 10 EXPERIMENT:previous_work 380 18 29
11 | SPAN 11 MATERIAL 380 33 47
12 | SPAN 12 MATERIAL 380 49 52
13 | SPAN 13 MATERIAL 381 22 31
14 | SPAN 14 EXPERIMENT:previous_work 381 41 44
15 | SPAN 15 VALUE 381 120 131
16 | SPAN 16 VALUE 381 136 141
17 | SPAN 17 VALUE 381 145 151
18 | SPAN 18 EXPERIMENT:previous_work 382 15 20
19 | SPAN 19 MATERIAL 382 30 39
20 | SPAN 20 EXPERIMENT:previous_work 382 58 61
21 | SPAN 21 DEVICE 382 110 140
22 | SPAN 22 MATERIAL 383 29 40
23 | SPAN 23 MATERIAL 383 42 46
24 | SPAN 24 MATERIAL 385 53 57
25 | SPAN 25 EXPERIMENT:previous_work 386 33 38
26 | SPAN 26 DEVICE 386 75 104
27 | SPAN 27 MATERIAL 387 100 133
28 | SPAN 28 MATERIAL 387 135 140
29 | SPAN 29 DEVICE 387 160 195
30 | SPAN 30 EXPERIMENT:previous_work 387 206 218
31 | SPAN 31 EXPERIMENT:previous_work 388 130 136
32 | SPAN 32 VALUE 388 159 171
33 | EXPERIMENT 1 1
34 | device 2
35 | EXPERIMENT 2 4
36 | cathode_material 5
37 | EXPERIMENT 3 7
38 | device 8
39 | EXPERIMENT 4 10
40 | cathode_material 9
41 | cathode_material 11
42 | EXPERIMENT 5 14
43 | cathode_material 13
44 | current_density 15
45 | time_of_operation 16
46 | working_temperature 17
47 | EXPERIMENT 6 18
48 | cathode_material 19
49 | EXPERIMENT 7 20
50 | device 21
51 | EXPERIMENT 8 25
52 | cathode_material 24
53 | device 26
54 | EXPERIMENT 9 30
55 | electrolyte_material 27
56 | device 29
57 | EXPERIMENT 10 31
58 | open_circuit_voltage 32
59 | LINK same_experiment 4 7
60 | LINK same_experiment 7 10
61 | LINK same_experiment 10 14
62 | LINK same_experiment 14 18
63 | LINK same_experiment 18 20
64 | LINK same_experiment 30 31
65 | LINK coreference 2 3
66 | LINK coreference 5 6
67 | LINK coreference 11 12
68 | LINK coreference 22 23
69 | LINK coreference 27 28
70 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC3730159.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 DEVICE 162 29 34
2 | SPAN 2 MATERIAL 162 44 46
3 | SPAN 3 MATERIAL 162 56 59
4 | SPAN 4 MATERIAL 162 60 62
5 | SPAN 5 MATERIAL 162 75 77
6 | SPAN 6 EXPERIMENT:previous_work 162 97 105
7 | SPAN 7 EXPERIMENT:previous_work 162 110 122
8 | SPAN 8 DEVICE 172 4 8
9 | SPAN 9 MATERIAL 172 22 30
10 | SPAN 10 MATERIAL 172 33 36
11 | SPAN 11 MATERIAL 172 38 40
12 | SPAN 12 EXPERIMENT:previous_work 172 65 77
13 | SPAN 13 EXPERIMENT:previous_work 173 75 83
14 | SPAN 14 MATERIAL 173 110 118
15 | SPAN 15 MATERIAL 173 121 124
16 | SPAN 16 MATERIAL 173 126 128
17 | SPAN 17 EXPERIMENT:previous_work 174 11 17
18 | SPAN 18 MATERIAL 174 27 30
19 | SPAN 19 VALUE 174 69 79
20 | SPAN 20 MATERIAL 174 111 116
21 | SPAN 21 DEVICE 174 129 133
22 | SPAN 22 EXPERIMENT:previous_work 174 134 144
23 | SPAN 23 MATERIAL 174 151 154
24 | SPAN 24 VALUE 174 181 191
25 | SPAN 25 MATERIAL 174 198 203
26 | SPAN 26 DEVICE 175 4 8
27 | SPAN 27 EXPERIMENT:previous_work 175 16 24
28 | SPAN 28 MATERIAL 175 28 43
29 | SPAN 29 MATERIAL 175 67 99
30 | SPAN 30 EXPERIMENT:previous_work 175 125 137
31 | SPAN 31 DEVICE 176 4 8
32 | SPAN 32 MATERIAL 176 59 74
33 | SPAN 33 MATERIAL 176 76 90
34 | SPAN 34 MATERIAL 176 100 105
35 | SPAN 35 DEVICE 177 58 62
36 | SPAN 36 EXPERIMENT:previous_work 177 74 83
37 | SPAN 37 VALUE 177 89 94
38 | EXPERIMENT 1 6
39 | device 1
40 | cathode_material 2
41 | electrolyte_material 3
42 | anode_material 4
43 | fuel_used 5
44 | EXPERIMENT 2 12
45 | device 8
46 | cathode_material 9
47 | cathode_material 10
48 | cathode_material 11
49 | EXPERIMENT 3 13
50 | cathode_material 14
51 | cathode_material 15
52 | cathode_material 16
53 | EXPERIMENT 4 17
54 | fuel_used 18
55 | current_density 19
56 | fuel_used 20
57 | EXPERIMENT 5 22
58 | device 21
59 | fuel_used 23
60 | current_density 24
61 | fuel_used 25
62 | EXPERIMENT 6 27
63 | device 26
64 | electrolyte_material 28
65 | cathode_material 29
66 | EXPERIMENT 7 36
67 | device 35
68 | voltage 37
69 | LINK same_experiment 6 7
70 | LINK same_experiment 12 13
71 | LINK experiment_variation 22 17
72 | LINK same_experiment 13 17
73 | LINK same_experiment 27 30
74 | LINK same_experiment 30 36
75 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4663492.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 672
2 | 2 0 673 782
3 | 3 0 783 953
4 | 4 1 954 1168
5 | 5 0 1169 1309
6 | 6 0 1310 1466
7 | 7 1 1467 1826
8 | 8 1 1827 1989
9 | 9 0 1990 2100
10 | 10 0 2101 2296
11 | 11 0 2297 2487
12 | 12 0 2488 2551
13 | 13 0 2552 2680
14 | 14 0 2681 2805
15 | 15 0 2806 2884
16 | 16 0 2885 3002
17 | 17 0 3003 3143
18 | 18 0 3144 3358
19 | 19 0 3359 3597
20 | 20 0 3598 3726
21 | 21 1 3727 3865
22 | 22 0 3866 4002
23 | 23 0 4003 4182
24 | 24 0 4183 4343
25 | 25 0 4344 4528
26 | 26 0 4529 4661
27 | 27 0 4662 4794
28 | 28 0 4795 4943
29 | 29 0 4944 5142
30 | 30 0 5143 5242
31 | 31 0 5243 5430
32 | 32 0 5431 5659
33 | 33 0 5660 5814
34 | 34 0 5815 6086
35 | 35 0 6087 6285
36 | 36 0 6286 6562
37 | 37 0 6563 6639
38 | 38 0 6640 6848
39 | 39 0 6849 6958
40 | 40 0 6959 7219
41 | 41 1 7220 7432
42 | 42 0 7433 7513
43 | 43 1 7514 7720
44 | 44 0 7721 7804
45 | 45 1 7805 7919
46 | 46 1 7920 8195
47 | 47 1 8196 8395
48 | 48 0 8396 8549
49 | 49 0 8550 8746
50 | 50 0 8747 8965
51 | 51 0 8966 9089
52 | 52 0 9090 9350
53 | 53 0 9351 9639
54 | 54 0 9640 9758
55 | 55 0 9759 9920
56 | 56 0 9921 10011
57 | 57 0 10012 10086
58 | 58 0 10087 10224
59 | 59 0 10225 10532
60 | 60 0 10533 10737
61 | 61 0 10738 10877
62 | 62 0 10878 11081
63 | 63 0 11082 11288
64 | 64 0 11289 11412
65 | 65 0 11413 11587
66 | 66 0 11588 11765
67 | 67 0 11766 11968
68 | 68 0 11969 12170
69 | 69 0 12171 12347
70 | 70 0 12348 12509
71 | 71 0 12510 12659
72 | 72 0 12660 12861
73 | 73 0 12862 12981
74 | 74 0 12982 13322
75 | 75 0 13323 13499
76 | 76 0 13500 13604
77 | 77 0 13605 13830
78 | 78 0 13831 13999
79 | 79 0 14000 14164
80 | 80 0 14165 14302
81 | 81 0 14303 14422
82 | 82 0 14423 14599
83 | 83 0 14600 14767
84 | 84 0 14768 14935
85 | 85 1 14936 15057
86 | 86 0 15058 15197
87 | 87 0 15198 15303
88 | 88 0 15304 15404
89 | 89 0 15405 15488
90 | 90 0 15489 15603
91 | 91 0 15604 15702
92 | 92 0 15703 15777
93 | 93 0 15778 15834
94 | 94 0 15835 15893
95 | 95 0 15894 16050
96 | 96 0 16051 16216
97 | 97 0 16217 16334
98 | 98 0 16335 16520
99 | 99 0 16521 16704
100 | 100 0 16705 16814
101 | 101 0 16815 16883
102 | 102 0 16884 17000
103 | 103 0 17001 17005
104 | 104 0 17006 17051
105 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC4222441.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 1 652 657
2 | SPAN 2 EXPERIMENT:current_exp 1 695 702
3 | SPAN 3 DEVICE 1 716 738
4 | SPAN 4 DEVICE 2 4 9
5 | SPAN 5 EXPERIMENT:current_exp 2 20 28
6 | SPAN 6 MATERIAL 2 32 40
7 | SPAN 7 DEVICE 8 30 52
8 | SPAN 8 DEVICE 8 54 59
9 | SPAN 9 DEVICE 9 0 4
10 | SPAN 10 EXPERIMENT:general_info 9 5 8
11 | SPAN 11 MATERIAL 9 21 29
12 | SPAN 12 MATERIAL 9 60 71
13 | SPAN 13 DEVICE 20 56 61
14 | SPAN 14 EXPERIMENT:future_work 20 73 80
15 | SPAN 15 MATERIAL 20 114 125
16 | SPAN 16 MATERIAL 20 141 147
17 | SPAN 17 MATERIAL 20 152 164
18 | SPAN 18 EXPERIMENT:general_info 21 21 26
19 | SPAN 19 MATERIAL 21 46 51
20 | SPAN 20 MATERIAL 21 86 112
21 | SPAN 21 MATERIAL 21 114 117
22 | SPAN 22 EXPERIMENT:current_exp 37 7 12
23 | SPAN 23 DEVICE 37 55 59
24 | SPAN 24 MATERIAL 37 107 109
25 | SPAN 25 VALUE 37 121 132
26 | SPAN 26 VALUE 37 172 177
27 | SPAN 27 EXPERIMENT:current_exp 39 18 26
28 | SPAN 28 DEVICE 39 35 40
29 | SPAN 29 VALUE 39 71 88
30 | SPAN 30 VALUE 44 23 34
31 | SPAN 31 VALUE 45 393 420
32 | SPAN 32 DEVICE 52 63 67
33 | SPAN 33 EXPERIMENT:current_exp 52 73 82
34 | SPAN 34 VALUE 52 129 140
35 | SPAN 35 VALUE 52 142 150
36 | SPAN 36 VALUE 52 197 212
37 | SPAN 37 VALUE 52 214 223
38 | SPAN 38 EXPERIMENT:previous_work 165 36 44
39 | SPAN 39 MATERIAL 165 45 47
40 | SPAN 40 MATERIAL 165 63 65
41 | SPAN 41 MATERIAL 165 66 69
42 | SPAN 42 MATERIAL 165 74 76
43 | SPAN 43 MATERIAL 165 77 80
44 | EXPERIMENT 1 2
45 | anode_material 1
46 | device 3
47 | EXPERIMENT 2 5
48 | device 4
49 | fuel_used 6
50 | EXPERIMENT 3 10
51 | device 9
52 | fuel_used 11
53 | fuel_used 12
54 | EXPERIMENT 4 14
55 | device 13
56 | fuel_used 15
57 | fuel_used 16
58 | fuel_used 17
59 | EXPERIMENT 5 18
60 | anode_material 19
61 | anode_material 20
62 | EXPERIMENT 6 22
63 | device 23
64 | fuel_used 24
65 | current_density 25
66 | time_of_operation 26
67 | EXPERIMENT 7 27
68 | device 28
69 | current_density 29
70 | EXPERIMENT 8 33
71 | device 32
72 | current_density 34
73 | time_of_operation 35
74 | current_density 36
75 | time_of_operation 37
76 | EXPERIMENT 9 38
77 | anode_material 39
78 | anode_material 40
79 | anode_material 41
80 | anode_material 42
81 | anode_material 43
82 | LINK same_experiment 2 5
83 | LINK coreference 7 8
84 | LINK coreference 7 9
85 | LINK coreference 20 21
86 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6164086.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 774
2 | 2 1 775 1081
3 | 3 1 1082 1260
4 | 4 0 1261 1498
5 | 5 1 1499 1926
6 | 6 0 1927 2020
7 | 7 0 2021 2166
8 | 8 0 2167 2258
9 | 9 0 2259 2329
10 | 10 0 2330 2460
11 | 11 0 2461 2722
12 | 12 0 2723 2962
13 | 13 0 2963 3187
14 | 14 0 3188 3321
15 | 15 0 3322 3428
16 | 16 0 3429 3558
17 | 17 0 3559 3680
18 | 18 0 3681 3888
19 | 19 0 3889 4078
20 | 20 0 4079 4254
21 | 21 0 4255 4459
22 | 22 0 4460 4538
23 | 23 1 4539 4706
24 | 24 1 4707 4934
25 | 25 1 4935 5186
26 | 26 0 5187 5398
27 | 27 0 5399 5448
28 | 28 0 5449 5626
29 | 29 0 5627 5685
30 | 30 0 5686 5802
31 | 31 1 5803 6084
32 | 32 0 6085 6235
33 | 33 0 6236 6253
34 | 34 0 6254 6413
35 | 35 0 6414 6706
36 | 36 0 6707 6842
37 | 37 0 6843 6895
38 | 38 0 6896 7143
39 | 39 0 7144 7247
40 | 40 0 7248 7370
41 | 41 0 7371 7533
42 | 42 0 7534 7827
43 | 43 0 7828 8038
44 | 44 0 8039 8116
45 | 45 0 8117 8256
46 | 46 0 8257 8383
47 | 47 0 8384 8463
48 | 48 0 8464 8643
49 | 49 0 8644 8836
50 | 50 0 8837 9032
51 | 51 0 9033 9175
52 | 52 0 9176 9299
53 | 53 0 9300 9484
54 | 54 0 9485 9565
55 | 55 0 9566 9716
56 | 56 0 9717 9949
57 | 57 0 9950 10095
58 | 58 0 10096 10157
59 | 59 0 10158 10261
60 | 60 0 10262 10438
61 | 61 0 10439 10582
62 | 62 0 10583 10686
63 | 63 0 10687 10792
64 | 64 0 10793 10897
65 | 65 1 10898 11016
66 | 66 1 11017 11119
67 | 67 0 11120 11209
68 | 68 0 11210 11400
69 | 69 0 11401 11530
70 | 70 0 11531 11594
71 | 71 0 11595 11672
72 | 72 1 11673 11856
73 | 73 1 11857 11894
74 | 74 0 11895 12093
75 | 75 0 12094 12225
76 | 76 0 12226 12368
77 | 77 0 12369 12510
78 | 78 0 12511 12580
79 | 79 0 12581 12660
80 | 80 0 12661 12918
81 | 81 0 12919 13010
82 | 82 0 13011 13252
83 | 83 0 13253 13312
84 | 84 0 13313 13430
85 | 85 0 13431 13695
86 | 86 0 13696 13817
87 | 87 0 13818 14155
88 | 88 0 14156 14312
89 | 89 0 14313 14495
90 | 90 0 14496 14648
91 | 91 0 14649 14775
92 | 92 0 14776 14851
93 | 93 0 14852 14966
94 | 94 0 14967 15096
95 | 95 0 15097 15179
96 | 96 0 15180 15350
97 | 97 0 15351 15522
98 | 98 0 15523 15583
99 | 99 0 15584 15759
100 | 100 0 15760 16000
101 | 101 0 16001 16101
102 | 102 1 16102 16290
103 | 103 0 16291 16531
104 | 104 1 16532 16710
105 | 105 0 16711 16808
106 | 106 1 16809 16932
107 | 107 1 16933 17097
108 | 108 0 17098 17260
109 | 109 0 17261 17369
110 | 110 0 17370 17525
111 | 111 0 17526 17647
112 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/code/source/constants.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 |
20 | PAD_IDX = 0
21 | OOV_IDX = 1
22 | PAD_TOK = ""
23 | OOV_TOK = ""
24 |
25 | START_TAG = ""
26 | STOP_TAG = ""
27 |
28 | # for coarse-grained concepts
29 | entity2index = {'O': 0, 'B-DEVICE': 1, 'B-EXPERIMENT': 2, 'B-MATERIAL': 3, 'B-VALUE': 4, 'I-DEVICE': 5, 'I-EXPERIMENT': 6, 'I-MATERIAL': 7, 'I-VALUE': 8}
30 | index2entity = dict((v,k) for k,v in entity2index.items())
31 | index2entity[-1] = "-" # for nicer debugging output
32 |
33 | # for fine-grained experiment slots
34 | slot2index = {'O': 0, "B-conductivity": 1, "I-conductivity": 2, "B-current_density": 3, "I-current_density": 4, "B-degradation_rate": 5, "I-degradation_rate": 6,
35 | "B-device": 7, "I-device": 8, "B-experiment_evoking_word": 9, "I-experiment_evoking_word": 10, "B-fuel_used": 11, "I-fuel_used": 12, "B-open_circuit_voltage": 13,
36 | "I-open_circuit_voltage": 14, "B-power_density": 15, "I-power_density": 16, "B-resistance": 17, "I-resistance": 18, "B-thickness": 19, "I-thickness": 20, "B-time_of_operation": 21,
37 | "I-time_of_operation": 22, "B-voltage": 23, "I-voltage": 24, "B-working_temperature": 25, "I-working_temperature": 26, "B-anode_material": 27, "B-cathode_material": 28,
38 | "B-electrolyte_material": 29, "B-interlayer_material": 30, "I-anode_material": 31, "I-cathode_material": 32, "I-electrolyte_material": 33,
39 | "I-interlayer_material": 34, "B-support_material": 35, "I-support_material": 36, "none": 0, "SAME_EXPERIMENT": 0, "B-interconnect_material": 0, "I-interconnect_material": 0}
40 | index2slot = dict((v,k) for k,v in slot2index.items())
41 | index2slot[0] = "O"
42 |
43 | RANDOM_SEED = 300
44 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6370853.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 23 35 53
2 | SPAN 2 MATERIAL 23 55 60
3 | SPAN 3 DEVICE 104 145 155
4 | SPAN 4 MATERIAL 104 165 175
5 | SPAN 5 EXPERIMENT:current_exp 104 193 202
6 | SPAN 6 EXPERIMENT:current_exp 104 207 215
7 | SPAN 7 MATERIAL 104 231 234
8 | SPAN 8 DEVICE 105 4 14
9 | SPAN 9 MATERIAL 105 39 49
10 | SPAN 10 MATERIAL 105 52 55
11 | SPAN 11 MATERIAL 105 58 62
12 | SPAN 12 MATERIAL 105 65 74
13 | SPAN 13 EXPERIMENT:current_exp 105 80 86
14 | SPAN 14 MATERIAL 105 101 103
15 | SPAN 15 MATERIAL 105 142 145
16 | SPAN 16 EXPERIMENT:current_exp 106 28 32
17 | SPAN 17 VALUE 106 33 38
18 | SPAN 18 VALUE 106 40 45
19 | SPAN 19 VALUE 106 47 52
20 | SPAN 20 VALUE 106 58 70
21 | SPAN 21 MATERIAL 106 75 80
22 | SPAN 22 MATERIAL 106 82 92
23 | SPAN 23 MATERIAL 106 94 104
24 | SPAN 24 MATERIAL 106 110 121
25 | SPAN 25 VALUE 106 140 146
26 | SPAN 26 MATERIAL 106 161 163
27 | SPAN 27 MATERIAL 111 36 49
28 | SPAN 28 MATERIAL 111 54 67
29 | SPAN 29 EXPERIMENT:current_exp 111 73 83
30 | SPAN 30 VALUE 111 90 95
31 | SPAN 31 VALUE 111 100 112
32 | SPAN 32 MATERIAL 116 30 40
33 | SPAN 33 MATERIAL 116 42 52
34 | SPAN 34 MATERIAL 116 58 69
35 | SPAN 35 EXPERIMENT:current_exp 116 70 74
36 | SPAN 36 VALUE 116 75 80
37 | SPAN 37 VALUE 116 82 87
38 | SPAN 38 VALUE 116 93 104
39 | SPAN 39 VALUE 116 123 129
40 | SPAN 40 MATERIAL 116 133 135
41 | SPAN 41 MATERIAL 130 32 53
42 | SPAN 42 DEVICE 130 54 58
43 | SPAN 43 MATERIAL 130 66 77
44 | SPAN 44 EXPERIMENT:current_exp 130 84 91
45 | SPAN 45 VALUE 130 92 104
46 | SPAN 46 MATERIAL 130 119 121
47 | SPAN 47 VALUE 130 125 131
48 | EXPERIMENT 1 5
49 | device 3
50 | anode_material 4
51 | EXPERIMENT 2 6
52 | anode_material 7
53 | EXPERIMENT 3 13
54 | device 8
55 | anode_material 9
56 | interlayer_material 10
57 | electrolyte_material 11
58 | fuel_used 14
59 | fuel_used 15
60 | EXPERIMENT 4 16
61 | power_density 17
62 | power_density 18
63 | power_density 19
64 | power_density 20
65 | anode_material 21
66 | anode_material 22
67 | anode_material 23
68 | anode_material 24
69 | working_temperature 25
70 | fuel_used 26
71 | EXPERIMENT 5 29
72 | anode_material 27
73 | anode_material 28
74 | power_density 30
75 | power_density 31
76 | EXPERIMENT 6 35
77 | anode_material 32
78 | anode_material 33
79 | anode_material 34
80 | resistance 36
81 | resistance 37
82 | resistance 38
83 | working_temperature 39
84 | fuel_used 40
85 | EXPERIMENT 7 44
86 | support_material 41
87 | device 42
88 | anode_material 43
89 | power_density 45
90 | fuel_used 46
91 | working_temperature 47
92 | LINK experiment_variation 6 5
93 | LINK same_experiment 5 13
94 | LINK same_experiment 13 16
95 | LINK coreference 1 2
96 | LINK coreference 13 12
97 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6249295.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1055
2 | 2 0 1056 1268
3 | 3 0 1269 1472
4 | 4 1 1473 1729
5 | 5 1 1730 1895
6 | 6 1 1896 2081
7 | 7 0 2082 2485
8 | 8 0 2486 2664
9 | 9 0 2665 2820
10 | 10 0 2821 2974
11 | 11 0 2975 3111
12 | 12 0 3112 3370
13 | 13 0 3371 3498
14 | 14 0 3499 3622
15 | 15 0 3623 3779
16 | 16 0 3780 3946
17 | 17 0 3947 4159
18 | 18 0 4160 4488
19 | 19 0 4489 4659
20 | 20 0 4660 4821
21 | 21 0 4822 5026
22 | 22 0 5027 5292
23 | 23 0 5293 5369
24 | 24 0 5370 5600
25 | 25 0 5601 5735
26 | 26 0 5736 5859
27 | 27 0 5860 6027
28 | 28 0 6028 6168
29 | 29 0 6169 6290
30 | 30 0 6291 6416
31 | 31 0 6417 6519
32 | 32 0 6520 6714
33 | 33 0 6715 6872
34 | 34 0 6873 7079
35 | 35 1 7080 7303
36 | 36 0 7304 7414
37 | 37 0 7415 7556
38 | 38 0 7557 7646
39 | 39 0 7647 7762
40 | 40 0 7763 7892
41 | 41 0 7893 7953
42 | 42 0 7954 8056
43 | 43 0 8057 8218
44 | 44 0 8219 8458
45 | 45 0 8459 8600
46 | 46 0 8601 8774
47 | 47 0 8775 8917
48 | 48 0 8918 9078
49 | 49 0 9079 9255
50 | 50 0 9256 9381
51 | 51 0 9382 9501
52 | 52 0 9502 9614
53 | 53 0 9615 9723
54 | 54 0 9724 9852
55 | 55 0 9853 9922
56 | 56 0 9923 10138
57 | 57 0 10139 10209
58 | 58 0 10210 10284
59 | 59 0 10285 10441
60 | 60 0 10442 10621
61 | 61 0 10622 10803
62 | 62 0 10804 10807
63 | 63 0 10808 10945
64 | 64 0 10946 11038
65 | 65 0 11039 11042
66 | 66 0 11043 11155
67 | 67 0 11156 11363
68 | 68 0 11364 11564
69 | 69 0 11565 11626
70 | 70 0 11627 11694
71 | 71 1 11695 11856
72 | 72 1 11857 12063
73 | 73 0 12064 12169
74 | 74 0 12170 12328
75 | 75 0 12329 12459
76 | 76 1 12460 12574
77 | 77 0 12575 12697
78 | 78 0 12698 12702
79 | 79 0 12703 13059
80 | 80 0 13060 13139
81 | 81 1 13140 13294
82 | 82 1 13295 13564
83 | 83 0 13565 13725
84 | 84 0 13726 13840
85 | 85 0 13841 13934
86 | 86 0 13935 14106
87 | 87 1 14107 14338
88 | 88 0 14339 14540
89 | 89 0 14541 14674
90 | 90 0 14675 14797
91 | 91 1 14798 14965
92 | 92 0 14966 15116
93 | 93 0 15117 15254
94 | 94 0 15255 15350
95 | 95 0 15351 15465
96 | 96 0 15466 15687
97 | 97 0 15688 15834
98 | 98 1 15835 15897
99 | 99 1 15898 16068
100 | 100 1 16069 16216
101 | 101 0 16217 16347
102 | 102 0 16348 16484
103 | 103 0 16485 16616
104 | 104 0 16617 16740
105 | 105 0 16741 16824
106 | 106 0 16825 17126
107 | 107 0 17127 17254
108 | 108 0 17255 17347
109 | 109 0 17348 17402
110 | 110 0 17403 17634
111 | 111 0 17635 17773
112 | 112 0 17774 17902
113 | 113 0 17903 17951
114 | 114 0 17952 18089
115 | 115 0 18090 18235
116 | 116 0 18236 18408
117 | 117 0 18409 18529
118 | 118 0 18530 18673
119 | 119 0 18674 18781
120 | 120 0 18782 18969
121 | 121 0 18970 19078
122 | 122 0 19079 19296
123 | 123 0 19297 19490
124 | 124 0 19491 19604
125 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5457052.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 1236
2 | 2 1 1237 1432
3 | 3 0 1433 1640
4 | 4 0 1641 1766
5 | 5 0 1767 2023
6 | 6 0 2024 2144
7 | 7 0 2145 2297
8 | 8 0 2298 2485
9 | 9 0 2486 2638
10 | 10 0 2639 2786
11 | 11 0 2787 2902
12 | 12 0 2903 3031
13 | 13 0 3032 3169
14 | 14 0 3170 3373
15 | 15 0 3374 3567
16 | 16 0 3568 3741
17 | 17 0 3742 3957
18 | 18 0 3958 4101
19 | 19 0 4102 4314
20 | 20 0 4315 4459
21 | 21 0 4460 4655
22 | 22 0 4656 4791
23 | 23 0 4792 4819
24 | 24 0 4820 4941
25 | 25 0 4942 4974
26 | 26 0 4975 5202
27 | 27 1 5203 5459
28 | 28 1 5460 5771
29 | 29 0 5772 5888
30 | 30 0 5889 6001
31 | 31 1 6002 6148
32 | 32 0 6149 6225
33 | 33 1 6226 6538
34 | 34 0 6539 6863
35 | 35 0 6864 6998
36 | 36 0 6999 7111
37 | 37 0 7112 7289
38 | 38 0 7290 7445
39 | 39 0 7446 7570
40 | 40 1 7571 7844
41 | 41 1 7845 8336
42 | 42 0 8337 8443
43 | 43 0 8444 8496
44 | 44 0 8497 8677
45 | 45 0 8678 8892
46 | 46 0 8893 9053
47 | 47 0 9054 9215
48 | 48 0 9216 9403
49 | 49 1 9404 9492
50 | 50 0 9493 9692
51 | 51 0 9693 9813
52 | 52 0 9814 9966
53 | 53 0 9967 10093
54 | 54 0 10094 10402
55 | 55 0 10403 10479
56 | 56 0 10480 10572
57 | 57 1 10573 10712
58 | 58 0 10713 10897
59 | 59 0 10898 10950
60 | 60 0 10951 11124
61 | 61 0 11125 11218
62 | 62 0 11219 11312
63 | 63 0 11313 11454
64 | 64 0 11455 11534
65 | 65 0 11535 11679
66 | 66 0 11680 11879
67 | 67 0 11880 12015
68 | 68 0 12016 12240
69 | 69 0 12241 12437
70 | 70 0 12438 12655
71 | 71 0 12656 12835
72 | 72 0 12836 12862
73 | 73 0 12863 12948
74 | 74 0 12949 13099
75 | 75 0 13100 13249
76 | 76 0 13250 13427
77 | 77 0 13428 13679
78 | 78 0 13680 13794
79 | 79 0 13795 13846
80 | 80 0 13847 14048
81 | 81 0 14049 14301
82 | 82 0 14302 14444
83 | 83 0 14445 14566
84 | 84 0 14567 14776
85 | 85 0 14777 14889
86 | 86 0 14890 15152
87 | 87 0 15153 15310
88 | 88 0 15311 15477
89 | 89 0 15478 15662
90 | 90 0 15663 15852
91 | 91 0 15853 15910
92 | 92 0 15911 16200
93 | 93 0 16201 16284
94 | 94 0 16285 16472
95 | 95 0 16473 16647
96 | 96 0 16648 16899
97 | 97 0 16900 17050
98 | 98 0 17051 17250
99 | 99 0 17251 17381
100 | 100 0 17382 17655
101 | 101 0 17656 17785
102 | 102 0 17786 17964
103 | 103 0 17965 18120
104 | 104 0 18121 18230
105 | 105 0 18231 18407
106 | 106 0 18408 18515
107 | 107 0 18516 18661
108 | 108 0 18662 18739
109 | 109 0 18740 18838
110 | 110 0 18839 18924
111 | 111 0 18925 19069
112 | 112 0 19070 19192
113 | 113 0 19193 19326
114 | 114 0 19327 19379
115 | 115 0 19380 19439
116 | 116 0 19440 19707
117 | 117 0 19708 19845
118 | 118 0 19846 20115
119 | 119 1 20116 20296
120 | 120 0 20297 20390
121 | 121 1 20391 20542
122 | 122 1 20543 20665
123 | 123 0 20666 20848
124 | 124 1 20849 20948
125 | 125 0 20949 21063
126 | 126 0 21064 21222
127 | 127 0 21223 21555
128 |
--------------------------------------------------------------------------------
/code/source/model/attentionLayer.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 |
18 | The class in this file is adapated from
19 | https://github.com/yuhaozhang/tacred-relation,
20 | licensed under the Apache License 2.0
21 | cf. 3rd-party-licenses.txt file in the root directory of this source tree.
22 | """
23 |
24 | import torch
25 | from torch import nn
26 | import torch.nn.functional as F
27 |
28 | """attention layer for use in bilstm framework"""
29 |
30 | class Attention(nn.Module):
31 | """
32 | A position-augmented attention layer where the attention weight is
33 | a = T' . tanh(Ux)
34 | where x is the input.
35 | """
36 |
37 | def __init__(self, input_size, attn_size):
38 | super(Attention, self).__init__()
39 | self.input_size = input_size
40 | self.attn_size = attn_size
41 | self.ulinear = nn.Linear(input_size, attn_size)
42 | self.tlinear = nn.Linear(attn_size, 1)
43 | self.init_weights()
44 |
45 | def init_weights(self):
46 | self.ulinear.weight.data.normal_(std=0.001)
47 | self.tlinear.weight.data.zero_() # use zero to give uniform attention at the beginning
48 |
49 | def forward(self, x, x_mask):
50 | """
51 | x : batch_size * seq_len * input_size
52 | x_mask : same dimensions, but bool tensor. contains true if masked, false if not masked
53 | """
54 | batch_size, seq_len, _ = x.size()
55 |
56 | x_proj = self.ulinear(x.contiguous().view(-1, self.input_size)).view(
57 | batch_size, seq_len, self.attn_size)
58 |
59 | scores = self.tlinear(torch.tanh(x_proj).view(-1, self.attn_size)).view(
60 | batch_size, seq_len)
61 |
62 | # mask padding
63 | scores.data.masked_fill_(x_mask.data, -float('inf'))
64 | weights = F.softmax(scores, dim=1)
65 | # weighted average input vectors
66 | outputs = weights.unsqueeze(1).bmm(x).squeeze(1)
67 |
68 | return outputs, weights
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/code/source/model/encoder.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 | import torch.nn as nn
20 | import torch
21 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
22 | from model.embeddingLayer import StackedEmbeddings
23 |
24 |
25 | class Encoder(nn.Module):
26 | """
27 | bilstm model for experiment sentence classification
28 | """
29 |
30 | def __init__(self, weights_matrix, embedding_options, options, device):
31 | """
32 |
33 | :param weights_matrix: numpy matrix with word embeddings
34 | :param options: python dictionary with hyperparameters
35 | """
36 |
37 | super().__init__()
38 |
39 | self.hidden_size = options['hidden_size']
40 | self.device = device
41 |
42 | embedding_types = [emb_opt['embedding_type'] for emb_opt in embedding_options]
43 | pretrained_embeddings = [torch.from_numpy(emb_opt['weights']).float().to(self.device) if emb_opt['weights'] is not None else None for emb_opt in embedding_options if emb_opt["name"] != "bert"]
44 | self.embeddingLayer = StackedEmbeddings(embedding_types, pretrained_embeddings, embedding_options, options, device)
45 |
46 | embeddingLayer_out_dim = self.embeddingLayer.get_output_dim()
47 |
48 | self.lstm = nn.LSTM(embeddingLayer_out_dim,
49 | self.hidden_size,
50 | num_layers=options['num_layers'],
51 | batch_first=True,
52 | bidirectional=True)
53 |
54 |
55 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths):
56 | x = self.embeddingLayer(tokens, subtokens, bertTensor, bert_subtoken_mask, token_lengths)
57 |
58 | lengths = lengths.cpu()
59 | inputs = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
60 |
61 | outputs, (ht, ct) = self.lstm(inputs)
62 | outputs, output_lens = pad_packed_sequence(outputs, batch_first=True)
63 |
64 | return outputs, output_lens
65 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5456866.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 752
2 | 2 1 753 973
3 | 3 1 974 1189
4 | 4 1 1190 1382
5 | 5 0 1383 1542
6 | 6 1 1543 1811
7 | 7 0 1812 1902
8 | 8 0 1903 2186
9 | 9 0 2187 2384
10 | 10 0 2385 2528
11 | 11 0 2529 2813
12 | 12 1 2814 2904
13 | 13 1 2905 3115
14 | 14 0 3116 3245
15 | 15 0 3246 3416
16 | 16 0 3417 3589
17 | 17 0 3590 3908
18 | 18 0 3909 4231
19 | 19 0 4232 4539
20 | 20 0 4540 4742
21 | 21 1 4743 4993
22 | 22 1 4994 5117
23 | 23 1 5118 5340
24 | 24 0 5341 5679
25 | 25 0 5680 5808
26 | 26 0 5809 6114
27 | 27 0 6115 6252
28 | 28 0 6253 6382
29 | 29 0 6383 6737
30 | 30 0 6738 6799
31 | 31 0 6800 7022
32 | 32 0 7023 7141
33 | 33 0 7142 7228
34 | 34 0 7229 7316
35 | 35 0 7317 7610
36 | 36 0 7611 7715
37 | 37 0 7716 7842
38 | 38 0 7843 8010
39 | 39 0 8011 8243
40 | 40 0 8244 8424
41 | 41 0 8425 8600
42 | 42 0 8601 8725
43 | 43 0 8726 8871
44 | 44 0 8872 9088
45 | 45 0 9089 9231
46 | 46 0 9232 9324
47 | 47 0 9325 9441
48 | 48 0 9442 9534
49 | 49 0 9535 9635
50 | 50 0 9636 9704
51 | 51 0 9705 9850
52 | 52 0 9851 10008
53 | 53 0 10009 10265
54 | 54 0 10266 10379
55 | 55 0 10380 10407
56 | 56 0 10408 10706
57 | 57 0 10707 10766
58 | 58 0 10767 10923
59 | 59 0 10924 11160
60 | 60 0 11161 11441
61 | 61 0 11442 11709
62 | 62 0 11710 12078
63 | 63 0 12079 12214
64 | 64 0 12215 12355
65 | 65 0 12356 12616
66 | 66 0 12617 12726
67 | 67 0 12727 12973
68 | 68 0 12974 13110
69 | 69 0 13111 13194
70 | 70 0 13195 13306
71 | 71 0 13307 13420
72 | 72 0 13421 13558
73 | 73 0 13559 13637
74 | 74 0 13638 13721
75 | 75 0 13722 13924
76 | 76 0 13925 14061
77 | 77 0 14062 14125
78 | 78 0 14126 14233
79 | 79 0 14234 14441
80 | 80 0 14442 14552
81 | 81 0 14553 14681
82 | 82 0 14682 14881
83 | 83 0 14882 15042
84 | 84 0 15043 15134
85 | 85 0 15135 15310
86 | 86 0 15311 15632
87 | 87 0 15633 15923
88 | 88 0 15924 16073
89 | 89 0 16074 16272
90 | 90 0 16273 16350
91 | 91 0 16351 16490
92 | 92 0 16491 16557
93 | 93 0 16558 16600
94 | 94 0 16601 16662
95 | 95 0 16663 16832
96 | 96 0 16833 16960
97 | 97 0 16961 17132
98 | 98 0 17133 17233
99 | 99 0 17234 17383
100 | 100 0 17384 17608
101 | 101 0 17609 17676
102 | 102 0 17677 17810
103 | 103 0 17811 18113
104 | 104 0 18114 18174
105 | 105 0 18175 18288
106 | 106 0 18289 18416
107 | 107 0 18417 18489
108 | 108 0 18490 18795
109 | 109 0 18796 18839
110 | 110 0 18840 19010
111 | 111 0 19011 19148
112 | 112 0 19149 19393
113 | 113 1 19394 19693
114 | 114 0 19694 19904
115 | 115 0 19905 20065
116 | 116 0 20066 20163
117 | 117 0 20164 20221
118 | 118 1 20222 20420
119 | 119 1 20421 20618
120 | 120 1 20619 20776
121 | 121 1 20777 21001
122 | 122 0 21002 21264
123 | 123 0 21265 21431
124 | 124 0 21432 21547
125 | 125 1 21548 21737
126 | 126 1 21738 21841
127 | 127 0 21842 22070
128 | 128 0 22071 22206
129 | 129 0 22207 22478
130 | 130 0 22479 22790
131 | 131 0 22791 22959
132 | 132 0 22960 23229
133 | 133 0 23230 23318
134 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC3564701.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 1007
2 | 2 0 1008 1141
3 | 3 0 1142 1273
4 | 4 1 1274 1492
5 | 5 1 1493 1806
6 | 6 0 1807 1960
7 | 7 0 1961 2126
8 | 8 0 2127 2221
9 | 9 0 2222 2347
10 | 10 0 2348 2455
11 | 11 1 2456 2713
12 | 12 0 2714 2811
13 | 13 0 2812 2970
14 | 14 0 2971 3193
15 | 15 1 3194 3354
16 | 16 1 3355 3501
17 | 17 0 3502 3694
18 | 18 0 3695 3808
19 | 19 1 3809 4103
20 | 20 1 4104 4278
21 | 21 0 4279 4451
22 | 22 0 4452 4642
23 | 23 1 4643 4807
24 | 24 0 4808 5092
25 | 25 1 5093 5318
26 | 26 0 5319 5496
27 | 27 0 5497 5602
28 | 28 0 5603 5898
29 | 29 0 5899 6128
30 | 30 0 6129 6251
31 | 31 0 6252 6342
32 | 32 0 6343 6482
33 | 33 0 6483 6636
34 | 34 0 6637 6694
35 | 35 0 6695 6740
36 | 36 0 6741 6812
37 | 37 0 6813 6849
38 | 38 0 6850 6876
39 | 39 0 6877 6989
40 | 40 0 6990 7221
41 | 41 0 7222 7308
42 | 42 0 7309 7412
43 | 43 0 7413 7510
44 | 44 0 7511 7679
45 | 45 0 7680 7781
46 | 46 0 7782 7962
47 | 47 0 7963 8090
48 | 48 0 8091 8264
49 | 49 0 8265 8343
50 | 50 0 8344 8544
51 | 51 0 8545 8682
52 | 52 0 8683 8814
53 | 53 0 8815 8975
54 | 54 0 8976 9086
55 | 55 0 9087 9158
56 | 56 0 9159 9329
57 | 57 0 9330 9442
58 | 58 0 9443 9614
59 | 59 0 9615 9774
60 | 60 0 9775 9966
61 | 61 0 9967 10165
62 | 62 0 10166 10306
63 | 63 0 10307 10396
64 | 64 0 10397 10486
65 | 65 0 10487 10572
66 | 66 0 10573 10623
67 | 67 0 10624 10752
68 | 68 0 10753 10925
69 | 69 0 10926 11139
70 | 70 0 11140 11238
71 | 71 0 11239 11390
72 | 72 0 11391 11686
73 | 73 0 11687 11760
74 | 74 0 11761 11841
75 | 75 0 11842 11938
76 | 76 0 11939 12012
77 | 77 0 12013 12120
78 | 78 0 12121 12217
79 | 79 0 12218 12532
80 | 80 0 12533 12690
81 | 81 0 12691 12842
82 | 82 0 12843 12918
83 | 83 0 12919 13108
84 | 84 0 13109 13287
85 | 85 0 13288 13335
86 | 86 0 13336 13394
87 | 87 0 13395 13524
88 | 88 1 13525 13781
89 | 89 0 13782 13894
90 | 90 1 13895 14006
91 | 91 0 14007 14125
92 | 92 0 14126 14238
93 | 93 1 14239 14357
94 | 94 0 14358 14555
95 | 95 1 14556 14803
96 | 96 1 14804 14952
97 | 97 0 14953 15193
98 | 98 0 15194 15275
99 | 99 0 15276 15463
100 | 100 1 15464 15717
101 | 101 0 15718 15774
102 | 102 0 15775 15888
103 | 103 0 15889 15978
104 | 104 0 15979 16034
105 | 105 0 16035 16169
106 | 106 1 16170 16335
107 | 107 0 16336 16451
108 | 108 0 16452 16572
109 | 109 0 16573 16672
110 | 110 0 16673 16955
111 | 111 0 16956 17128
112 | 112 0 17129 17234
113 | 113 0 17235 17369
114 | 114 0 17370 17471
115 | 115 0 17472 17652
116 | 116 0 17653 17767
117 | 117 0 17768 17951
118 | 118 1 17952 18128
119 | 119 1 18129 18281
120 | 120 1 18282 18493
121 | 121 0 18494 18591
122 | 122 0 18592 18762
123 | 123 0 18763 19046
124 | 124 0 19047 19125
125 | 125 0 19126 19243
126 | 126 0 19244 19297
127 | 127 0 19298 19372
128 | 128 0 19373 19432
129 | 129 0 19433 19495
130 | 130 0 19496 19547
131 | 131 0 19548 19680
132 | 132 0 19681 19801
133 | 133 0 19802 19890
134 | 134 0 19891 19991
135 |
--------------------------------------------------------------------------------
/code/source/model/sentenceClassifier.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 |
20 | import torch.nn as nn
21 | import torch
22 | from model.attentionLayer import Attention
23 | from model.encoder import Encoder
24 |
25 |
26 | class SentenceClassifier(nn.Module):
27 | """
28 | bilstm model for experiment sentence classification
29 | """
30 | def __init__(self, weights_matrix, embedding_options, options, device):
31 | """
32 |
33 | :param weights_matrix: numpy matrix with word embeddings
34 | :param options: python dictionary with hyperparameters
35 | """
36 |
37 | super().__init__()
38 |
39 | self.hidden_size = options['hidden_size']
40 | self.num_labels = options['num_labels']
41 | self.device = device
42 | self.options = options
43 |
44 | self.encoder = Encoder(weights_matrix, embedding_options, options, device)
45 |
46 | representation_size = 2 * self.hidden_size
47 |
48 | self.attention_size = options['attention_size']
49 | self.attention = Attention(self.hidden_size*2, self.attention_size)
50 | self.batchnorm3 = nn.BatchNorm1d(self.hidden_size*2)
51 |
52 | self.linear = nn.Linear(representation_size, self.num_labels)
53 |
54 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths, return_weights=False):
55 |
56 | outputs, out_lens = self.encoder(tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths)
57 |
58 | max_length = outputs.shape[1]
59 |
60 | masks = []
61 | for length in lengths:
62 | falses = [False]*length.item()
63 | trues = [True]*(outputs.shape[1] - length.item())
64 | mask_tsr = torch.tensor(falses+trues).unsqueeze(0)
65 | masks.append(mask_tsr)
66 | masks = torch.cat(masks, dim=0).to(self.device)
67 |
68 | hidden, attention_weights = self.attention(outputs, masks)
69 | hidden = self.batchnorm3(hidden)
70 |
71 | output = self.linear(hidden)
72 |
73 | if return_weights:
74 | return output, attention_weights
75 | else:
76 | return output
77 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6632008.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 1 1174 1182
2 | SPAN 2 EXPERIMENT:current_exp 1 1187 1195
3 | SPAN 3 DEVICE 1 1225 1246
4 | SPAN 4 DEVICE 1 1248 1252
5 | SPAN 5 MATERIAL 1 1263 1281
6 | SPAN 6 EXPERIMENT:current_exp 2 18 26
7 | SPAN 7 MATERIAL 2 39 65
8 | SPAN 8 MATERIAL 2 67 72
9 | SPAN 9 VALUE 5 34 45
10 | SPAN 10 EXPERIMENT:current_exp 5 49 57
11 | SPAN 11 VALUE 5 61 67
12 | SPAN 12 MATERIAL 6 40 44
13 | SPAN 13 EXPERIMENT:current_exp 6 57 68
14 | SPAN 14 EXPERIMENT:current_exp 54 28 37
15 | SPAN 15 VALUE 54 83 94
16 | SPAN 16 VALUE 54 98 101
17 | SPAN 17 VALUE 54 106 112
18 | SPAN 18 VALUE 54 117 121
19 | SPAN 19 MATERIAL 85 57 83
20 | SPAN 20 MATERIAL 85 100 126
21 | SPAN 21 MATERIAL 85 128 132
22 | SPAN 22 MATERIAL 89 135 172
23 | SPAN 23 EXPERIMENT:current_exp 102 32 44
24 | SPAN 24 VALUE 102 102 115
25 | SPAN 25 MATERIAL 103 4 6
26 | SPAN 26 MATERIAL 103 7 33
27 | SPAN 27 MATERIAL 103 34 38
28 | SPAN 28 MATERIAL 103 39 65
29 | SPAN 29 MATERIAL 116 23 49
30 | SPAN 30 MATERIAL 118 19 22
31 | SPAN 31 EXPERIMENT:current_exp 118 44 46
32 | SPAN 32 VALUE 118 52 63
33 | SPAN 33 EXPERIMENT:current_exp 118 87 89
34 | SPAN 34 VALUE 118 90 101
35 | SPAN 35 MATERIAL 118 120 128
36 | SPAN 36 EXPERIMENT:current_exp 118 150 152
37 | SPAN 37 VALUE 118 153 164
38 | SPAN 38 EXPERIMENT:current_exp 118 187 189
39 | SPAN 39 VALUE 118 190 201
40 | SPAN 40 VALUE 118 211 217
41 | SPAN 41 VALUE 121 13 24
42 | SPAN 42 EXPERIMENT:current_exp 121 25 35
43 | SPAN 43 VALUE 121 52 78
44 | SPAN 44 VALUE 121 91 102
45 | SPAN 45 DEVICE 122 77 81
46 | SPAN 46 MATERIAL 123 31 57
47 | SPAN 47 DEVICE 127 29 33
48 | SPAN 48 VALUE 127 57 60
49 | SPAN 49 EXPERIMENT:current_exp 127 61 69
50 | SPAN 50 VALUE 127 105 116
51 | SPAN 51 VALUE 127 123 127
52 | SPAN 52 VALUE 127 132 136
53 | SPAN 53 VALUE 127 140 146
54 | SPAN 54 VALUE 127 151 157
55 | EXPERIMENT 1 2
56 | device 3
57 | cathode_material 5
58 | EXPERIMENT 2 6
59 | cathode_material 7
60 | EXPERIMENT 3 10
61 | conductivity 9
62 | working_temperature 11
63 | EXPERIMENT 4 13
64 | electrolyte_material 12
65 | EXPERIMENT 5 14
66 | current_density 15
67 | working_temperature 16
68 | working_temperature 17
69 | time_of_operation 18
70 | EXPERIMENT 6 23
71 | working_temperature 24
72 | EXPERIMENT 7 31
73 | cathode_material 30
74 | resistance 32
75 | EXPERIMENT 8 33
76 | power_density 34
77 | EXPERIMENT 9 36
78 | cathode_material 35
79 | resistance 37
80 | EXPERIMENT 10 38
81 | power_density 39
82 | working_temperature 40
83 | EXPERIMENT 11 42
84 | resistance 41
85 | resistance 43
86 | resistance 44
87 | EXPERIMENT 12 49
88 | device 47
89 | time_of_operation 48
90 | time_of_operation 51
91 | time_of_operation 52
92 | working_temperature 53
93 | working_temperature 54
94 | LINK same_experiment 1 2
95 | LINK same_experiment 2 6
96 | LINK experiment_variation 36 33
97 | LINK same_experiment 31 33
98 | LINK same_experiment 36 38
99 | LINK coreference 3 4
100 | LINK coreference 7 8
101 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6632008.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 1293
2 | 2 1 1294 1480
3 | 3 0 1481 1633
4 | 4 0 1634 1749
5 | 5 1 1750 1818
6 | 6 1 1819 1973
7 | 7 0 1974 2111
8 | 8 0 2112 2168
9 | 9 0 2169 2413
10 | 10 0 2414 2655
11 | 11 0 2656 2799
12 | 12 0 2800 3006
13 | 13 0 3007 3161
14 | 14 0 3162 3364
15 | 15 0 3365 3627
16 | 16 0 3628 3707
17 | 17 0 3708 3948
18 | 18 0 3949 4091
19 | 19 0 4092 4256
20 | 20 0 4257 4499
21 | 21 0 4500 4749
22 | 22 0 4750 4898
23 | 23 0 4899 5148
24 | 24 0 5149 5290
25 | 25 0 5291 5578
26 | 26 0 5579 5596
27 | 27 0 5597 5738
28 | 28 0 5739 5977
29 | 29 0 5978 6133
30 | 30 0 6134 6199
31 | 31 0 6200 6296
32 | 32 0 6297 6434
33 | 33 0 6435 6642
34 | 34 0 6643 6702
35 | 35 0 6703 6818
36 | 36 0 6819 7020
37 | 37 0 7021 7092
38 | 38 0 7093 7167
39 | 39 0 7168 7275
40 | 40 0 7276 7363
41 | 41 0 7364 7628
42 | 42 0 7629 7715
43 | 43 0 7716 8084
44 | 44 0 8085 8456
45 | 45 0 8457 8880
46 | 46 0 8881 8989
47 | 47 0 8990 9077
48 | 48 0 9078 9239
49 | 49 0 9240 9390
50 | 50 0 9391 9602
51 | 51 0 9603 9740
52 | 52 0 9741 9904
53 | 53 0 9905 10006
54 | 54 1 10007 10129
55 | 55 0 10130 10231
56 | 56 0 10232 10324
57 | 57 0 10325 10456
58 | 58 0 10457 10484
59 | 59 0 10485 10598
60 | 60 0 10599 10716
61 | 61 0 10717 10921
62 | 62 0 10922 11151
63 | 63 0 11152 11298
64 | 64 0 11299 11527
65 | 65 0 11528 11782
66 | 66 0 11783 11853
67 | 67 0 11854 12003
68 | 68 0 12004 12130
69 | 69 0 12131 12240
70 | 70 0 12241 12403
71 | 71 0 12404 12542
72 | 72 0 12543 12704
73 | 73 0 12705 12828
74 | 74 0 12829 13035
75 | 75 0 13036 13114
76 | 76 0 13115 13161
77 | 77 0 13162 13352
78 | 78 0 13353 13606
79 | 79 0 13607 13816
80 | 80 0 13817 13945
81 | 81 0 13946 14152
82 | 82 0 14153 14226
83 | 83 0 14227 14448
84 | 84 0 14449 14662
85 | 85 0 14663 14947
86 | 86 0 14948 15093
87 | 87 0 15094 15166
88 | 88 0 15167 15319
89 | 89 0 15320 15578
90 | 90 0 15579 15647
91 | 91 0 15648 15782
92 | 92 0 15783 15849
93 | 93 0 15850 15949
94 | 94 0 15950 16168
95 | 95 0 16169 16460
96 | 96 0 16461 16987
97 | 97 0 16988 17074
98 | 98 0 17075 17266
99 | 99 0 17267 17403
100 | 100 0 17404 17528
101 | 101 0 17529 17694
102 | 102 1 17695 17811
103 | 103 0 17812 17941
104 | 104 0 17942 18094
105 | 105 0 18095 18486
106 | 106 0 18487 18811
107 | 107 0 18812 18997
108 | 108 0 18998 19143
109 | 109 0 19144 19286
110 | 110 0 19287 19368
111 | 111 0 19369 19586
112 | 112 0 19587 19731
113 | 113 0 19732 19780
114 | 114 0 19781 19933
115 | 115 0 19934 20034
116 | 116 0 20035 20129
117 | 117 0 20130 20281
118 | 118 1 20282 20500
119 | 119 0 20501 20606
120 | 120 0 20607 20763
121 | 121 1 20764 21094
122 | 122 0 21095 21215
123 | 123 0 21216 21373
124 | 124 0 21374 21586
125 | 125 0 21587 21747
126 | 126 0 21748 21839
127 | 127 1 21840 21998
128 | 128 0 21999 22295
129 | 129 0 22296 22405
130 | 130 0 22406 22511
131 | 131 0 22512 22594
132 | 132 0 22595 22754
133 | 133 0 22755 22833
134 | 134 0 22834 22974
135 | 135 0 22975 23183
136 | 136 0 23184 23285
137 | 137 0 23286 23380
138 | 138 0 23381 23459
139 | 139 0 23460 23610
140 | 140 0 23611 23759
141 | 141 0 23760 23902
142 |
--------------------------------------------------------------------------------
/code/source/dataHandling/sampling.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Annemarie Friedrich
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU Affero General Public License as published
7 | by the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU Affero General Public License for more details.
14 | You should have received a copy of the GNU Affero General Public License
15 | along with this program. If not, see .
16 | """
17 |
18 | from torch.utils.data import Sampler
19 | import random
20 | from copy import deepcopy
21 | from collections import defaultdict
22 |
23 |
24 | class WeightedDownSampler(Sampler):
25 |
26 | def __init__(self, dataset, class_idx, class_weights, class_key=None):
27 | """
28 | :param data_source: The data set to be sampled from.
29 | :param class_weights: dictionary with downsampling weights for the classes, 0.0 means "keep all", 0.3 means
30 | "keep 70% of this class"
31 | :param class_key: if give, use this instead of class index (not tested yet!!) --> for pytorch_all data structures
32 |
33 | Instantiate only once (when creating the Dataset instance).
34 | In DataLoader, call only the iterator: when instantiating the iterator, the sampling really happens.
35 | """
36 | # collect information about where instances of each class are in dataset
37 | self.class_weights = class_weights
38 | self.indices_by_class = defaultdict(list)
39 | for i, inst in enumerate(dataset):
40 | if class_key:
41 | label = int(inst[class_key].item())
42 | else:
43 | # assume a class_idx is given in this case
44 | label = int(inst[class_idx].item()) # assume integer coding for classes
45 | self.indices_by_class[label].append(i)
46 | # determine length of samples according to the given downsampling weights
47 | self.num_samples = int(sum([(1-class_weights[c])*len(self.indices_by_class[c]) for c in class_weights]))
48 |
49 | def __iter__(self):
50 | """
51 | :return: an iterator over the indices of the items, which will be used sequentially by DataLoader
52 | to split the data into batches.
53 | """
54 | # Now actually downsample the data
55 | indices = []
56 | for c in self.indices_by_class:
57 | class_indices = deepcopy(self.indices_by_class[c])
58 | random.shuffle(class_indices)
59 | indices += class_indices[:int(len(class_indices)*(1-self.class_weights[c]))]
60 | random.shuffle(indices)
61 | return iter(indices)
62 |
63 | def __len__(self):
64 | return self.num_samples
65 |
66 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4772004.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 794
2 | 2 1 795 967
3 | 3 0 968 1101
4 | 4 1 1102 1288
5 | 5 0 1289 1404
6 | 6 0 1405 1651
7 | 7 0 1652 1820
8 | 8 0 1821 1917
9 | 9 0 1918 1980
10 | 10 0 1981 2160
11 | 11 1 2161 2310
12 | 12 0 2311 2428
13 | 13 0 2429 2618
14 | 14 0 2619 2780
15 | 15 1 2781 3045
16 | 16 0 3046 3112
17 | 17 0 3113 3262
18 | 18 0 3263 3343
19 | 19 0 3344 3417
20 | 20 0 3418 3636
21 | 21 0 3637 3736
22 | 22 1 3737 4014
23 | 23 0 4015 4129
24 | 24 1 4130 4333
25 | 25 0 4334 4589
26 | 26 1 4590 4759
27 | 27 0 4760 4907
28 | 28 0 4908 5014
29 | 29 0 5015 5176
30 | 30 0 5177 5274
31 | 31 0 5275 5397
32 | 32 0 5398 5584
33 | 33 0 5585 5760
34 | 34 0 5761 5863
35 | 35 0 5864 5954
36 | 36 0 5955 6044
37 | 37 0 6045 6180
38 | 38 0 6181 6399
39 | 39 0 6400 6512
40 | 40 0 6513 6667
41 | 41 0 6668 6672
42 | 42 0 6673 6843
43 | 43 0 6844 6848
44 | 44 0 6849 6960
45 | 45 0 6961 7188
46 | 46 0 7189 7345
47 | 47 0 7346 7350
48 | 48 0 7351 7453
49 | 49 0 7454 7649
50 | 50 0 7650 7800
51 | 51 0 7801 8004
52 | 52 0 8005 8262
53 | 53 0 8263 8344
54 | 54 0 8345 8444
55 | 55 0 8445 8547
56 | 56 0 8548 8658
57 | 57 0 8659 8910
58 | 58 0 8911 8981
59 | 59 1 8982 9087
60 | 60 0 9088 9140
61 | 61 0 9141 9219
62 | 62 0 9220 9334
63 | 63 0 9335 9460
64 | 64 0 9461 9592
65 | 65 0 9593 9719
66 | 66 0 9720 9893
67 | 67 0 9894 10060
68 | 68 0 10061 10183
69 | 69 0 10184 10259
70 | 70 0 10260 10319
71 | 71 0 10320 10407
72 | 72 0 10408 10621
73 | 73 1 10622 10753
74 | 74 0 10754 10876
75 | 75 0 10877 11119
76 | 76 0 11120 11332
77 | 77 1 11333 11476
78 | 78 1 11477 11582
79 | 79 0 11583 11753
80 | 80 0 11754 11876
81 | 81 1 11877 12075
82 | 82 0 12076 12323
83 | 83 0 12324 12535
84 | 84 1 12536 12714
85 | 85 0 12715 12927
86 | 86 0 12928 13079
87 | 87 1 13080 13300
88 | 88 1 13301 13433
89 | 89 0 13434 13618
90 | 90 0 13619 13864
91 | 91 0 13865 13945
92 | 92 0 13946 14071
93 | 93 0 14072 14160
94 | 94 0 14161 14334
95 | 95 0 14335 14574
96 | 96 0 14575 14675
97 | 97 0 14676 14935
98 | 98 1 14936 15142
99 | 99 0 15143 15341
100 | 100 0 15342 15439
101 | 101 1 15440 15533
102 | 102 0 15534 15694
103 | 103 0 15695 15914
104 | 104 0 15915 15975
105 | 105 0 15976 16078
106 | 106 0 16079 16187
107 | 107 0 16188 16444
108 | 108 0 16445 16602
109 | 109 0 16603 17046
110 | 110 0 17047 17142
111 | 111 0 17143 17250
112 | 112 0 17251 17408
113 | 113 0 17409 17511
114 | 114 0 17512 17669
115 | 115 0 17670 17786
116 | 116 0 17787 17890
117 | 117 0 17891 18032
118 | 118 0 18033 18185
119 | 119 0 18186 18285
120 | 120 0 18286 18289
121 | 121 0 18290 18407
122 | 122 0 18408 18457
123 | 123 0 18458 18575
124 | 124 0 18576 18658
125 | 125 0 18659 18881
126 | 126 0 18882 18993
127 | 127 0 18994 19144
128 | 128 1 19145 19439
129 | 129 0 19440 19627
130 | 130 0 19628 19716
131 | 131 0 19717 19899
132 | 132 0 19900 20022
133 | 133 0 20023 20188
134 | 134 0 20189 20346
135 | 135 0 20347 20529
136 | 136 0 20530 20657
137 | 137 0 20658 20739
138 | 138 1 20740 20854
139 | 139 0 20855 20955
140 | 140 0 20956 21109
141 | 141 1 21110 21256
142 | 142 0 21257 21396
143 | 143 0 21397 21513
144 | 144 0 21514 21570
145 | 145 0 21571 21580
146 | 146 0 21581 21699
147 | 147 0 21700 21704
148 | 148 0 21705 21750
149 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5331335.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1113
2 | 2 0 1114 1295
3 | 3 0 1296 1511
4 | 4 1 1512 1944
5 | 5 0 1945 2082
6 | 6 1 2083 2254
7 | 7 0 2255 2474
8 | 8 1 2475 2829
9 | 9 0 2830 2998
10 | 10 1 2999 3184
11 | 11 1 3185 3396
12 | 12 0 3397 3560
13 | 13 1 3561 3737
14 | 14 0 3738 3843
15 | 15 0 3844 4013
16 | 16 0 4014 4195
17 | 17 0 4196 4418
18 | 18 0 4419 4682
19 | 19 0 4683 4921
20 | 20 0 4922 5059
21 | 21 0 5060 5225
22 | 22 0 5226 5500
23 | 23 1 5501 5752
24 | 24 0 5753 5905
25 | 25 1 5906 6143
26 | 26 1 6144 6290
27 | 27 0 6291 6403
28 | 28 0 6404 6711
29 | 29 0 6712 7003
30 | 30 0 7004 7308
31 | 31 0 7309 7487
32 | 32 0 7488 7580
33 | 33 0 7581 7822
34 | 34 0 7823 7921
35 | 35 1 7922 8101
36 | 36 0 8102 8466
37 | 37 0 8467 8727
38 | 38 0 8728 8890
39 | 39 0 8891 9109
40 | 40 0 9110 9191
41 | 41 0 9192 9356
42 | 42 0 9357 9433
43 | 43 0 9434 9611
44 | 44 0 9612 9846
45 | 45 0 9847 9969
46 | 46 0 9970 10080
47 | 47 0 10081 10252
48 | 48 0 10253 10590
49 | 49 0 10591 10716
50 | 50 0 10717 10887
51 | 51 0 10888 10978
52 | 52 0 10979 11249
53 | 53 0 11250 11355
54 | 54 1 11356 11532
55 | 55 0 11533 11749
56 | 56 1 11750 11930
57 | 57 1 11931 12123
58 | 58 1 12124 12305
59 | 59 1 12306 12504
60 | 60 0 12505 12582
61 | 61 0 12583 12701
62 | 62 0 12702 12902
63 | 63 0 12903 13084
64 | 64 0 13085 13279
65 | 65 0 13280 13477
66 | 66 1 13478 13590
67 | 67 0 13591 13754
68 | 68 0 13755 13925
69 | 69 0 13926 14068
70 | 70 0 14069 14231
71 | 71 0 14232 14433
72 | 72 0 14434 14750
73 | 73 0 14751 14907
74 | 74 0 14908 15140
75 | 75 0 15141 15244
76 | 76 0 15245 15334
77 | 77 0 15335 15435
78 | 78 0 15436 15558
79 | 79 0 15559 15785
80 | 80 0 15786 15975
81 | 81 0 15976 16148
82 | 82 0 16149 16470
83 | 83 0 16471 16619
84 | 84 0 16620 16880
85 | 85 0 16881 17082
86 | 86 0 17083 17374
87 | 87 0 17375 17667
88 | 88 0 17668 17834
89 | 89 0 17835 18004
90 | 90 0 18005 18143
91 | 91 0 18144 18237
92 | 92 0 18238 18352
93 | 93 0 18353 18599
94 | 94 1 18600 18765
95 | 95 1 18766 19013
96 | 96 1 19014 19235
97 | 97 0 19236 19348
98 | 98 0 19349 19475
99 | 99 0 19476 19626
100 | 100 0 19627 19716
101 | 101 0 19717 19930
102 | 102 0 19931 20093
103 | 103 0 20094 20275
104 | 104 0 20276 20403
105 | 105 0 20404 20541
106 | 106 0 20542 20719
107 | 107 0 20720 20804
108 | 108 0 20805 20948
109 | 109 0 20949 21104
110 | 110 0 21105 21486
111 | 111 1 21487 21672
112 | 112 0 21673 21852
113 | 113 0 21853 21987
114 | 114 0 21988 22188
115 | 115 0 22189 22379
116 | 116 1 22380 22592
117 | 117 1 22593 22818
118 | 118 0 22819 23109
119 | 119 0 23110 23295
120 | 120 0 23296 23431
121 | 121 0 23432 23535
122 | 122 0 23536 23694
123 | 123 0 23695 23855
124 | 124 0 23856 23978
125 | 125 0 23979 24094
126 | 126 0 24095 24203
127 | 127 0 24204 24342
128 | 128 0 24343 24490
129 | 129 0 24491 24576
130 | 130 0 24577 24802
131 | 131 0 24803 24964
132 | 132 0 24965 25150
133 | 133 0 25151 25295
134 | 134 0 25296 25413
135 | 135 0 25414 25492
136 | 136 0 25493 25739
137 | 137 0 25740 25863
138 | 138 0 25864 26015
139 | 139 0 26016 26094
140 | 140 0 26095 26263
141 | 141 0 26264 26332
142 | 142 0 26333 26396
143 | 143 0 26397 26503
144 | 144 0 26504 26508
145 | 145 0 26509 26558
146 | 146 0 26559 26695
147 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5700654.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 924
2 | 2 0 925 927
3 | 3 0 928 1384
4 | 4 0 1385 1387
5 | 5 0 1388 1659
6 | 6 0 1660 1830
7 | 7 0 1831 1975
8 | 8 0 1976 2090
9 | 9 0 2091 2248
10 | 10 0 2249 2442
11 | 11 0 2443 2678
12 | 12 0 2679 2858
13 | 13 0 2859 3536
14 | 14 0 3537 3546
15 | 15 0 3547 3556
16 | 16 0 3557 3564
17 | 17 0 3565 3572
18 | 18 0 3573 3583
19 | 19 0 3584 3586
20 | 20 0 3587 3589
21 | 21 0 3590 3601
22 | 22 0 3602 3611
23 | 23 0 3612 3614
24 | 24 0 3615 3626
25 | 25 0 3627 3827
26 | 26 0 3828 4207
27 | 27 0 4208 4527
28 | 28 0 4528 4650
29 | 29 0 4651 5088
30 | 30 0 5089 5419
31 | 31 0 5420 5679
32 | 32 0 5680 5915
33 | 33 0 5916 6191
34 | 34 0 6192 6668
35 | 35 0 6669 7085
36 | 36 0 7086 7290
37 | 37 0 7291 7631
38 | 38 0 7632 8408
39 | 39 0 8409 8535
40 | 40 0 8536 8747
41 | 41 0 8748 8988
42 | 42 0 8989 9131
43 | 43 0 9132 9247
44 | 44 0 9248 9464
45 | 45 0 9465 9592
46 | 46 0 9593 9749
47 | 47 0 9750 9940
48 | 48 0 9941 10096
49 | 49 0 10097 10215
50 | 50 0 10216 10392
51 | 51 0 10393 10553
52 | 52 0 10554 10673
53 | 53 0 10674 10766
54 | 54 0 10767 10989
55 | 55 0 10990 11156
56 | 56 0 11157 11249
57 | 57 0 11250 11545
58 | 58 0 11546 11859
59 | 59 0 11860 11947
60 | 60 0 11948 12016
61 | 61 0 12017 12174
62 | 62 0 12175 12476
63 | 63 0 12477 12679
64 | 64 0 12680 12826
65 | 65 0 12827 12968
66 | 66 0 12969 13125
67 | 67 0 13126 13272
68 | 68 0 13273 13435
69 | 69 0 13436 13606
70 | 70 0 13607 13832
71 | 71 0 13833 13993
72 | 72 0 13994 14193
73 | 73 0 14194 14426
74 | 74 0 14427 14795
75 | 75 0 14796 14975
76 | 76 0 14976 15226
77 | 77 0 15227 15330
78 | 78 0 15331 15427
79 | 79 0 15428 15594
80 | 80 0 15595 15670
81 | 81 0 15671 15888
82 | 82 0 15889 15948
83 | 83 0 15949 16102
84 | 84 0 16103 16319
85 | 85 0 16320 16520
86 | 86 0 16521 16654
87 | 87 0 16655 16846
88 | 88 1 16847 17141
89 | 89 0 17142 17245
90 | 90 1 17246 17613
91 | 91 0 17614 17708
92 | 92 0 17709 17923
93 | 93 0 17924 18070
94 | 94 1 18071 18289
95 | 95 0 18290 18584
96 | 96 1 18585 18904
97 | 97 0 18905 19132
98 | 98 0 19133 19695
99 | 99 0 19696 20218
100 | 100 0 20219 20339
101 | 101 0 20340 20472
102 | 102 0 20473 20594
103 | 103 1 20595 20783
104 | 104 0 20784 20891
105 | 105 0 20892 21104
106 | 106 0 21105 21320
107 | 107 0 21321 21478
108 | 108 1 21479 21725
109 | 109 0 21726 21866
110 | 110 0 21867 22325
111 | 111 0 22326 22501
112 | 112 0 22502 22705
113 | 113 0 22706 22846
114 | 114 0 22847 23004
115 | 115 0 23005 23118
116 | 116 0 23119 23294
117 | 117 0 23295 23538
118 | 118 0 23539 23684
119 | 119 0 23685 23847
120 | 120 0 23848 24178
121 | 121 0 24179 24321
122 | 122 0 24322 24429
123 | 123 0 24430 24600
124 | 124 0 24601 24760
125 | 125 0 24761 24876
126 | 126 0 24877 25115
127 | 127 0 25116 25539
128 | 128 0 25540 25974
129 | 129 0 25975 26076
130 | 130 0 26077 26232
131 | 131 0 26233 26375
132 | 132 0 26376 26509
133 | 133 0 26510 26598
134 | 134 0 26599 26769
135 | 135 0 26770 26946
136 | 136 0 26947 27149
137 | 137 0 27150 27221
138 | 138 0 27222 27307
139 | 139 0 27308 27411
140 | 140 0 27412 27890
141 | 141 0 27891 28050
142 | 142 0 28051 28149
143 | 143 0 28150 28337
144 | 144 0 28338 28544
145 | 145 0 28545 28702
146 | 146 0 28703 28788
147 | 147 0 28789 28854
148 | 148 0 28855 28928
149 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4992832.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 788
2 | 2 1 789 1009
3 | 3 0 1010 1146
4 | 4 0 1147 1328
5 | 5 1 1329 1570
6 | 6 0 1571 1708
7 | 7 1 1709 1854
8 | 8 1 1855 2035
9 | 9 1 2036 2167
10 | 10 0 2168 2334
11 | 11 0 2335 2573
12 | 12 0 2574 2646
13 | 13 0 2647 2772
14 | 14 1 2773 3223
15 | 15 0 3224 3301
16 | 16 0 3302 3396
17 | 17 0 3397 3515
18 | 18 0 3516 3622
19 | 19 0 3623 3708
20 | 20 1 3709 3868
21 | 21 0 3869 3994
22 | 22 0 3995 4091
23 | 23 1 4092 4219
24 | 24 1 4220 4341
25 | 25 1 4342 4534
26 | 26 1 4535 4614
27 | 27 1 4615 4800
28 | 28 0 4801 4939
29 | 29 1 4940 5081
30 | 30 0 5082 5278
31 | 31 1 5279 5457
32 | 32 0 5458 5680
33 | 33 0 5681 5753
34 | 34 0 5754 5960
35 | 35 0 5961 6157
36 | 36 0 6158 6337
37 | 37 0 6338 6562
38 | 38 0 6563 6662
39 | 39 0 6663 6768
40 | 40 0 6769 6994
41 | 41 0 6995 7142
42 | 42 0 7143 7253
43 | 43 0 7254 7506
44 | 44 0 7507 7685
45 | 45 0 7686 8015
46 | 46 0 8016 8239
47 | 47 0 8240 8490
48 | 48 0 8491 8735
49 | 49 0 8736 8929
50 | 50 0 8930 9158
51 | 51 0 9159 9375
52 | 52 0 9376 9669
53 | 53 0 9670 9884
54 | 54 0 9885 10065
55 | 55 0 10066 10242
56 | 56 0 10243 10461
57 | 57 0 10462 10583
58 | 58 0 10584 10801
59 | 59 0 10802 10950
60 | 60 0 10951 11114
61 | 61 0 11115 11216
62 | 62 0 11217 11419
63 | 63 0 11420 11540
64 | 64 0 11541 11651
65 | 65 0 11652 11760
66 | 66 0 11761 12010
67 | 67 0 12011 12221
68 | 68 0 12222 12313
69 | 69 0 12314 12580
70 | 70 0 12581 12633
71 | 71 1 12634 12802
72 | 72 0 12803 12952
73 | 73 0 12953 13089
74 | 74 0 13090 13225
75 | 75 0 13226 13470
76 | 76 0 13471 13722
77 | 77 0 13723 13843
78 | 78 0 13844 14010
79 | 79 0 14011 14196
80 | 80 0 14197 14308
81 | 81 0 14309 14394
82 | 82 0 14395 14516
83 | 83 0 14517 14747
84 | 84 0 14748 14852
85 | 85 1 14853 15044
86 | 86 0 15045 15181
87 | 87 1 15182 15267
88 | 88 1 15268 15418
89 | 89 0 15419 15503
90 | 90 1 15504 15654
91 | 91 1 15655 15808
92 | 92 1 15809 15867
93 | 93 1 15868 16029
94 | 94 0 16030 16194
95 | 95 0 16195 16393
96 | 96 0 16394 16547
97 | 97 0 16548 16711
98 | 98 1 16712 16819
99 | 99 1 16820 17044
100 | 100 0 17045 17265
101 | 101 0 17266 17402
102 | 102 0 17403 17494
103 | 103 1 17495 17660
104 | 104 0 17661 17760
105 | 105 0 17761 18144
106 | 106 0 18145 18289
107 | 107 0 18290 18388
108 | 108 0 18389 18463
109 | 109 0 18464 18605
110 | 110 0 18606 18771
111 | 111 0 18772 18830
112 | 112 0 18831 18937
113 | 113 0 18938 19097
114 | 114 0 19098 19133
115 | 115 0 19134 19362
116 | 116 0 19363 19464
117 | 117 0 19465 19573
118 | 118 0 19574 19910
119 | 119 0 19911 20044
120 | 120 0 20045 20124
121 | 121 0 20125 20260
122 | 122 0 20261 20326
123 | 123 0 20327 20444
124 | 124 0 20445 20532
125 | 125 0 20533 20681
126 | 126 0 20682 20872
127 | 127 0 20873 21046
128 | 128 0 21047 21205
129 | 129 0 21206 21393
130 | 130 0 21394 21449
131 | 131 0 21450 21623
132 | 132 0 21624 21729
133 | 133 0 21730 21780
134 | 134 0 21781 22028
135 | 135 0 22029 22164
136 | 136 0 22165 22309
137 | 137 0 22310 22395
138 | 138 0 22396 22460
139 | 139 0 22461 22641
140 | 140 0 22642 22774
141 | 141 0 22775 22896
142 | 142 0 22897 22993
143 | 143 0 22994 23076
144 | 144 0 23077 23175
145 | 145 0 23176 23350
146 | 146 0 23351 23474
147 | 147 0 23475 23538
148 | 148 0 23539 23667
149 | 149 0 23668 23672
150 | 150 0 23673 23718
151 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5944822.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 DEVICE 133 191 213
2 | SPAN 2 DEVICE 133 215 220
3 | SPAN 3 DEVICE 135 0 22
4 | SPAN 4 DEVICE 135 23 28
5 | SPAN 5 VALUE 135 222 233
6 | SPAN 6 DEVICE 135 249 254
7 | SPAN 7 EXPERIMENT:general_info 136 32 39
8 | SPAN 8 DEVICE 136 78 83
9 | SPAN 9 DEVICE 137 163 168
10 | SPAN 10 DEVICE 138 146 151
11 | SPAN 11 DEVICE 138 211 216
12 | SPAN 12 EXPERIMENT:general_info 138 328 335
13 | SPAN 13 DEVICE 138 373 378
14 | SPAN 14 DEVICE 139 100 105
15 | SPAN 15 DEVICE 140 137 142
16 | SPAN 16 DEVICE 142 57 62
17 | SPAN 17 EXPERIMENT:general_info 142 63 71
18 | SPAN 18 MATERIAL 142 83 109
19 | SPAN 19 MATERIAL 142 113 137
20 | SPAN 20 MATERIAL 142 146 161
21 | SPAN 21 DEVICE 142 327 332
22 | SPAN 22 DEVICE 143 100 105
23 | SPAN 23 MATERIAL 144 133 168
24 | SPAN 24 DEVICE 144 209 214
25 | SPAN 25 DEVICE 144 371 375
26 | SPAN 26 EXPERIMENT:previous_work 144 376 386
27 | SPAN 27 MATERIAL 144 413 448
28 | SPAN 28 EXPERIMENT:previous_work 144 459 462
29 | SPAN 29 VALUE 144 463 476
30 | SPAN 30 VALUE 144 480 486
31 | SPAN 31 MATERIAL 144 508 543
32 | SPAN 32 EXPERIMENT:previous_work 144 562 568
33 | SPAN 33 VALUE 144 599 612
34 | SPAN 34 VALUE 144 616 622
35 | SPAN 35 MATERIAL 145 191 223
36 | SPAN 36 MATERIAL 145 225 232
37 | SPAN 37 MATERIAL 145 244 250
38 | SPAN 38 DEVICE 146 19 29
39 | SPAN 39 MATERIAL 146 47 54
40 | SPAN 40 MATERIAL 149 132 154
41 | SPAN 41 EXPERIMENT:previous_work 149 185 197
42 | SPAN 42 MATERIAL 149 214 234
43 | SPAN 43 MATERIAL 149 236 239
44 | SPAN 44 MATERIAL 149 241 256
45 | SPAN 45 DEVICE 149 257 265
46 | SPAN 46 EXPERIMENT:previous_work 149 281 285
47 | SPAN 47 MATERIAL 149 321 347
48 | SPAN 48 EXPERIMENT:previous_work 149 356 365
49 | SPAN 49 MATERIAL 149 434 437
50 | SPAN 50 MATERIAL 149 466 492
51 | SPAN 51 EXPERIMENT:previous_work 150 12 20
52 | SPAN 52 EXPERIMENT:previous_work 150 154 162
53 | SPAN 53 MATERIAL 150 169 185
54 | SPAN 54 MATERIAL 150 187 190
55 | SPAN 55 EXPERIMENT:general_info 153 112 124
56 | SPAN 56 DEVICE 153 128 133
57 | SPAN 57 EXPERIMENT:previous_work 153 300 308
58 | SPAN 58 MATERIAL 153 344 351
59 | SPAN 59 MATERIAL 153 353 381
60 | SPAN 60 MATERIAL 153 394 401
61 | EXPERIMENT 1 7
62 | device 8
63 | EXPERIMENT 2 12
64 | device 13
65 | EXPERIMENT 3 17
66 | device 16
67 | cathode_material 18
68 | cathode_material 19
69 | cathode_material 20
70 | EXPERIMENT 4 26
71 | device 25
72 | cathode_material 27
73 | EXPERIMENT 5 28
74 | resistance 29
75 | EXPERIMENT 6 32
76 | cathode_material 31
77 | resistance 33
78 | working_temperature 34
79 | EXPERIMENT 7 41
80 | cathode_material 40
81 | electrolyte_material 42
82 | support_material 44
83 | device 45
84 | EXPERIMENT 8 46
85 | cathode_material 47
86 | EXPERIMENT 9 48
87 | electrolyte_material 49
88 | cathode_material 50
89 | EXPERIMENT 10 52
90 | cathode_material 53
91 | EXPERIMENT 11 55
92 | device 56
93 | EXPERIMENT 12 57
94 | cathode_material 58
95 | cathode_material 59
96 | anode_material 60
97 | LINK experiment_variation 32 28
98 | LINK same_experiment 26 28
99 | LINK same_experiment 41 46
100 | LINK same_experiment 46 48
101 | LINK same_experiment 51 52
102 | LINK coreference 1 2
103 | LINK coreference 3 4
104 | LINK coreference 24 25
105 | LINK coreference 35 36
106 | LINK coreference 42 43
107 | LINK coreference 53 54
108 |
--------------------------------------------------------------------------------
/code/source/evaluation/ir_stats.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 | import numpy as np
20 |
21 | """
22 | Utilities for evaluating classification performance.
23 | """
24 |
25 |
26 | def compute_eval_scores(conf_matrix, labels):
27 | """
28 | :param conf_matrix: num_classes x num_classes confusion matrix, first dimension is the gold standard label,
29 | second dimension is the predicted label
30 | :param labels: list of labels, indices correspond to the indices in the confusion matrix
31 | :return: accuracy, macro p, r, f, dictionaries with p, r, f by class
32 | """
33 | num_classes = len(labels)
34 | conf = np.array(conf_matrix)
35 |
36 | p = {} # precision by class
37 | r = {} # recall by class
38 | f1 = {} # F1 by class
39 |
40 | for i in range(num_classes):
41 | cat = labels[i]
42 | correct = conf[i, i]
43 | total_gold = sum(conf[i])
44 | total_pred = sum(conf[:,i])
45 | # Define for now: if not predicted this class at all, P=1
46 | if total_pred == 0:
47 | p[cat] = 1
48 | else:
49 | p[cat] = correct/total_pred * 100
50 | if total_gold == 0:
51 | r[cat] = 0
52 | else:
53 | r[cat] = correct/total_gold * 100
54 | # Define for now: if P=0 and R=0 then F1 = 0
55 | if p[cat] == 0 and r[cat] == 0:
56 | f1[cat] = 0
57 | else:
58 | f1[cat] = 2*p[cat]*r[cat]/(p[cat]+r[cat])
59 |
60 | # macro-averages (each class weighted equally)
61 | macro_p = sum(p.values()) / num_classes
62 | macro_r = sum(r.values()) / num_classes
63 | # macro-avg. F1 is the harmonic mean of macro-p and macro-r
64 | macro_f1 = 2*macro_p*macro_r/(macro_p+macro_r)
65 |
66 | # accuracy
67 | correct = 0
68 | total = 0
69 | for i in range(num_classes):
70 | correct += conf[i,i]
71 | total += sum(conf[i])
72 | accuracy = correct/total*100
73 |
74 | # print stats - dont want this most of the time
75 | print("Overall accuracy: {:.1f}".format(accuracy))
76 | print("Macro-avg. precision: {:.1f}".format(macro_p))
77 | print("Macro-avg. recall: {:.1f}".format(macro_r))
78 | print("Macro-avg. F1: {:.1f}".format(macro_f1))
79 | print("Class statistics (P/R/F):")
80 | for i in range(num_classes):
81 | cat = labels[i]
82 | print("{:20s} {:.1f} {:.1f} {:.1f}".format(str(labels[i]), p[cat], r[cat], f1[cat]))
83 |
84 | # return all the results
85 | return accuracy, macro_p, macro_r, macro_f1, p, r, f1
86 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC4735809.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 7 53 69
2 | SPAN 2 MATERIAL 7 71 74
3 | SPAN 3 MATERIAL 7 77 93
4 | SPAN 4 MATERIAL 7 95 98
5 | SPAN 5 MATERIAL 7 105 121
6 | SPAN 6 MATERIAL 7 123 126
7 | SPAN 7 EXPERIMENT:current_exp 7 160 172
8 | SPAN 8 MATERIAL 7 247 249
9 | SPAN 9 MATERIAL 7 278 280
10 | SPAN 10 EXPERIMENT:general_info 21 36 45
11 | SPAN 11 MATERIAL 21 49 60
12 | SPAN 12 MATERIAL 21 89 105
13 | SPAN 13 MATERIAL 21 107 110
14 | SPAN 14 MATERIAL 23 31 34
15 | SPAN 15 EXPERIMENT:previous_work 23 121 132
16 | SPAN 16 MATERIAL 24 120 136
17 | SPAN 17 MATERIAL 24 138 141
18 | SPAN 18 MATERIAL 24 144 160
19 | SPAN 19 MATERIAL 24 162 165
20 | SPAN 20 MATERIAL 24 172 188
21 | SPAN 21 MATERIAL 24 190 193
22 | SPAN 22 MATERIAL 25 0 3
23 | SPAN 23 MATERIAL 25 8 11
24 | SPAN 24 EXPERIMENT:current_exp 25 17 24
25 | SPAN 25 MATERIAL 25 63 66
26 | SPAN 26 EXPERIMENT:current_exp 25 76 88
27 | SPAN 27 MATERIAL 30 70 73
28 | SPAN 28 MATERIAL 30 103 106
29 | SPAN 29 MATERIAL 30 111 114
30 | SPAN 30 EXPERIMENT:current_exp 30 121 133
31 | SPAN 31 MATERIAL 42 114 117
32 | SPAN 32 VALUE 42 163 176
33 | SPAN 33 VALUE 42 180 186
34 | SPAN 34 MATERIAL 42 190 193
35 | SPAN 35 EXPERIMENT:current_exp 42 265 275
36 | SPAN 36 MATERIAL 58 202 205
37 | SPAN 37 MATERIAL 58 210 213
38 | SPAN 38 EXPERIMENT:current_exp 58 214 216
39 | SPAN 39 VALUE 58 229 240
40 | SPAN 40 VALUE 58 244 255
41 | SPAN 41 MATERIAL 59 0 3
42 | SPAN 42 EXPERIMENT:current_exp 59 21 23
43 | SPAN 43 VALUE 59 87 97
44 | SPAN 44 MATERIAL 62 36 39
45 | SPAN 45 MATERIAL 62 44 47
46 | SPAN 46 EXPERIMENT:current_exp 62 72 74
47 | SPAN 47 MATERIAL 71 197 200
48 | SPAN 48 EXPERIMENT:current_exp 71 223 231
49 | SPAN 49 DEVICE 71 245 252
50 | SPAN 50 EXPERIMENT:current_exp 71 284 289
51 | SPAN 51 VALUE 71 296 306
52 | SPAN 52 MATERIAL 71 322 324
53 | SPAN 53 VALUE 71 329 339
54 | SPAN 54 MATERIAL 71 356 358
55 | SPAN 55 VALUE 71 381 387
56 | SPAN 56 VALUE 72 96 109
57 | SPAN 57 MATERIAL 72 113 116
58 | SPAN 58 MATERIAL 72 121 124
59 | SPAN 59 VALUE 117 85 97
60 | SPAN 60 MATERIAL 117 107 110
61 | SPAN 61 VALUE 117 114 122
62 | EXPERIMENT 1 7
63 | cathode_material 1
64 | cathode_material 3
65 | cathode_material 5
66 | fuel_used 8
67 | fuel_used 9
68 | EXPERIMENT 2 10
69 | anode_material 11
70 | cathode_material 12
71 | EXPERIMENT 3 15
72 | anode_material 14
73 | EXPERIMENT 4 24
74 | anode_material 22
75 | anode_material 23
76 | EXPERIMENT 5 26
77 | cathode_material 25
78 | EXPERIMENT 6 30
79 | cathode_material 27
80 | anode_material 28
81 | anode_material 29
82 | EXPERIMENT 7 35
83 | cathode_material 31
84 | resistance 32
85 | working_temperature 33
86 | fuel_used 34
87 | EXPERIMENT 8 38
88 | anode_material 36
89 | anode_material 37
90 | conductivity 39
91 | working_temperature 40
92 | EXPERIMENT 9 42
93 | cathode_material 41
94 | conductivity 43
95 | EXPERIMENT 10 46
96 | cathode_material 44
97 | cathode_material 45
98 | EXPERIMENT 11 48
99 | anode_material 47
100 | device 49
101 | EXPERIMENT 12 50
102 | resistance 51
103 | fuel_used 52
104 | resistance 53
105 | fuel_used 54
106 | working_temperature 55
107 | LINK experiment_variation 26 24
108 | LINK same_experiment 48 50
109 | LINK coreference 1 2
110 | LINK coreference 3 4
111 | LINK coreference 5 6
112 | LINK coreference 12 13
113 | LINK coreference 16 17
114 | LINK coreference 18 19
115 | LINK coreference 20 21
116 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC4663492.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 4 50 53
2 | SPAN 2 VALUE 4 54 64
3 | SPAN 3 EXPERIMENT:current_exp 4 72 74
4 | SPAN 4 MATERIAL 4 110 117
5 | SPAN 5 VALUE 4 158 168
6 | SPAN 6 VALUE 4 173 179
7 | SPAN 7 MATERIAL 7 27 56
8 | SPAN 8 MATERIAL 7 58 64
9 | SPAN 9 EXPERIMENT:general_info 7 80 89
10 | SPAN 10 DEVICE 7 156 178
11 | SPAN 11 DEVICE 7 180 185
12 | SPAN 12 EXPERIMENT:general_info 7 225 227
13 | SPAN 13 MATERIAL 7 261 296
14 | SPAN 14 MATERIAL 7 298 304
15 | SPAN 15 VALUE 7 346 358
16 | SPAN 16 MATERIAL 8 33 36
17 | SPAN 17 MATERIAL 8 38 53
18 | SPAN 18 EXPERIMENT:previous_work 8 64 72
19 | SPAN 19 VALUE 8 93 97
20 | SPAN 20 VALUE 8 99 104
21 | SPAN 21 VALUE 8 110 122
22 | SPAN 22 VALUE 8 126 129
23 | SPAN 23 VALUE 8 131 134
24 | SPAN 24 VALUE 8 140 146
25 | SPAN 25 EXPERIMENT:current_exp 21 19 22
26 | SPAN 26 VALUE 21 65 75
27 | SPAN 27 MATERIAL 21 86 94
28 | SPAN 28 VALUE 21 118 123
29 | SPAN 29 MATERIAL 21 124 127
30 | SPAN 30 MATERIAL 41 29 44
31 | SPAN 31 DEVICE 41 73 102
32 | SPAN 32 EXPERIMENT:current_exp 41 117 125
33 | SPAN 33 VALUE 41 189 211
34 | SPAN 34 EXPERIMENT:current_exp 43 21 29
35 | SPAN 35 VALUE 43 38 44
36 | SPAN 36 EXPERIMENT:current_exp 45 28 32
37 | SPAN 37 VALUE 45 33 35
38 | SPAN 38 VALUE 45 37 39
39 | SPAN 39 VALUE 45 41 43
40 | SPAN 40 VALUE 45 45 48
41 | SPAN 41 VALUE 45 54 65
42 | SPAN 42 VALUE 45 69 72
43 | SPAN 43 VALUE 45 74 77
44 | SPAN 44 VALUE 45 79 82
45 | SPAN 45 VALUE 45 84 87
46 | SPAN 46 VALUE 45 93 99
47 | SPAN 47 VALUE 46 38 44
48 | SPAN 48 EXPERIMENT:current_exp 46 49 57
49 | SPAN 49 MATERIAL 46 73 80
50 | SPAN 50 MATERIAL 47 17 32
51 | SPAN 51 EXPERIMENT:current_exp 47 46 49
52 | SPAN 52 EXPERIMENT:current_exp 47 77 80
53 | SPAN 53 MATERIAL 47 105 121
54 | SPAN 54 MATERIAL 85 21 36
55 | SPAN 55 EXPERIMENT:current_exp 85 50 52
56 | SPAN 56 DEVICE 85 115 120
57 | EXPERIMENT 1 1
58 | power_density 2
59 | EXPERIMENT 2 3
60 | anode_material 4
61 | power_density 5
62 | working_temperature 6
63 | EXPERIMENT 3 9
64 | anode_material 7
65 | device 10
66 | EXPERIMENT 4 12
67 | anode_material 13
68 | working_temperature 15
69 | EXPERIMENT 5 18
70 | electrolyte_material 17
71 | conductivity 19
72 | conductivity 20
73 | conductivity 21
74 | working_temperature 22
75 | working_temperature 23
76 | working_temperature 24
77 | EXPERIMENT 6 25
78 | anode_material 27
79 | anode_material 29
80 | EXPERIMENT 7 32
81 | anode_material 30
82 | device 31
83 | working_temperature 33
84 | EXPERIMENT 8 34
85 | working_temperature 35
86 | EXPERIMENT 9 36
87 | power_density 37
88 | power_density 38
89 | power_density 39
90 | power_density 40
91 | power_density 41
92 | working_temperature 42
93 | working_temperature 43
94 | working_temperature 44
95 | working_temperature 45
96 | working_temperature 46
97 | EXPERIMENT 10 48
98 | working_temperature 47
99 | anode_material 49
100 | EXPERIMENT 11 51
101 | anode_material 50
102 | EXPERIMENT 12 52
103 | anode_material 53
104 | EXPERIMENT 13 55
105 | anode_material 54
106 | device 56
107 | LINK experiment_variation 3 1
108 | LINK experiment_variation 12 9
109 | LINK same_experiment 32 34
110 | LINK experiment_variation 48 36
111 | LINK same_experiment 34 36
112 | LINK experiment_variation 51 48
113 | LINK experiment_variation 52 51
114 | LINK coreference 7 8
115 | LINK coreference 10 11
116 | LINK coreference 13 14
117 | LINK coreference 17 16
118 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5793538.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1106
2 | 2 1 1107 1320
3 | 3 1 1321 1497
4 | 4 1 1498 1786
5 | 5 0 1787 1847
6 | 6 1 1848 2055
7 | 7 1 2056 2289
8 | 8 0 2290 2645
9 | 9 0 2646 2849
10 | 10 0 2850 3130
11 | 11 1 3131 3467
12 | 12 1 3468 3759
13 | 13 0 3760 3987
14 | 14 0 3988 4318
15 | 15 1 4319 4517
16 | 16 1 4518 4698
17 | 17 1 4699 4887
18 | 18 1 4888 5003
19 | 19 1 5004 5138
20 | 20 1 5139 5358
21 | 21 1 5359 5476
22 | 22 1 5477 5719
23 | 23 1 5720 6019
24 | 24 0 6020 6308
25 | 25 1 6309 6691
26 | 26 0 6692 6982
27 | 27 0 6983 7149
28 | 28 0 7150 7305
29 | 29 0 7306 7430
30 | 30 0 7431 7564
31 | 31 0 7565 7671
32 | 32 0 7672 7833
33 | 33 0 7834 8003
34 | 34 1 8004 8171
35 | 35 1 8172 8258
36 | 36 1 8259 8351
37 | 37 0 8352 8466
38 | 38 0 8467 8566
39 | 39 0 8567 8743
40 | 40 0 8744 8854
41 | 41 0 8855 8996
42 | 42 0 8997 9161
43 | 43 0 9162 9280
44 | 44 0 9281 9397
45 | 45 0 9398 9506
46 | 46 0 9507 9815
47 | 47 0 9816 9920
48 | 48 0 9921 10084
49 | 49 0 10085 10253
50 | 50 0 10254 10485
51 | 51 0 10486 10866
52 | 52 0 10867 11061
53 | 53 0 11062 11231
54 | 54 0 11232 11502
55 | 55 0 11503 11530
56 | 56 0 11531 11658
57 | 57 0 11659 11925
58 | 58 0 11926 12182
59 | 59 0 12183 12461
60 | 60 0 12462 12634
61 | 61 0 12635 12811
62 | 62 0 12812 12934
63 | 63 0 12935 13028
64 | 64 0 13029 13110
65 | 65 0 13111 13437
66 | 66 0 13438 13581
67 | 67 0 13582 13778
68 | 68 0 13779 14015
69 | 69 0 14016 14235
70 | 70 0 14236 14383
71 | 71 0 14384 14565
72 | 72 0 14566 14617
73 | 73 0 14618 14781
74 | 74 0 14782 14962
75 | 75 0 14963 15336
76 | 76 0 15337 15512
77 | 77 0 15513 15768
78 | 78 0 15769 15882
79 | 79 0 15883 16008
80 | 80 0 16009 16298
81 | 81 0 16299 16509
82 | 82 1 16510 16705
83 | 83 1 16706 16820
84 | 84 0 16821 16952
85 | 85 1 16953 17181
86 | 86 1 17182 17346
87 | 87 0 17347 17424
88 | 88 0 17425 17651
89 | 89 1 17652 17847
90 | 90 1 17848 18003
91 | 91 1 18004 18254
92 | 92 0 18255 18393
93 | 93 0 18394 18645
94 | 94 0 18646 18889
95 | 95 0 18890 19086
96 | 96 0 19087 19229
97 | 97 0 19230 19484
98 | 98 0 19485 19650
99 | 99 0 19651 19819
100 | 100 0 19820 20295
101 | 101 0 20296 20428
102 | 102 1 20429 20524
103 | 103 0 20525 20750
104 | 104 1 20751 20891
105 | 105 0 20892 21139
106 | 106 0 21140 21333
107 | 107 0 21334 21492
108 | 108 0 21493 21773
109 | 109 1 21774 21977
110 | 110 0 21978 22268
111 | 111 0 22269 22391
112 | 112 0 22392 22509
113 | 113 0 22510 22671
114 | 114 0 22672 22780
115 | 115 0 22781 22936
116 | 116 0 22937 23067
117 | 117 1 23068 23204
118 | 118 0 23205 23281
119 | 119 1 23282 23386
120 | 120 0 23387 23632
121 | 121 0 23633 23840
122 | 122 0 23841 24013
123 | 123 0 24014 24102
124 | 124 0 24103 24170
125 | 125 1 24171 24339
126 | 126 0 24340 24557
127 | 127 1 24558 24658
128 | 128 0 24659 24958
129 | 129 0 24959 25116
130 | 130 0 25117 25519
131 | 131 0 25520 25646
132 | 132 0 25647 25733
133 | 133 1 25734 25853
134 | 134 1 25854 25989
135 | 135 0 25990 26096
136 | 136 0 26097 26288
137 | 137 0 26289 26466
138 | 138 1 26467 26646
139 | 139 0 26647 26847
140 | 140 0 26848 26976
141 | 141 1 26977 27057
142 | 142 0 27058 27200
143 | 143 1 27201 27467
144 | 144 1 27468 27616
145 | 145 0 27617 27732
146 | 146 0 27733 28011
147 | 147 0 28012 28132
148 | 148 0 28133 28313
149 | 149 1 28314 28478
150 | 150 1 28479 28647
151 | 151 1 28648 28952
152 | 152 0 28953 29027
153 | 153 1 29028 29204
154 | 154 0 29205 29346
155 | 155 0 29347 29468
156 | 156 0 29469 29623
157 | 157 0 29624 29809
158 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4673446.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 906
2 | 2 0 907 1257
3 | 3 1 1258 1518
4 | 4 1 1519 1695
5 | 5 1 1696 1784
6 | 6 0 1785 1878
7 | 7 0 1879 2052
8 | 8 0 2053 2313
9 | 9 0 2314 2470
10 | 10 0 2471 2660
11 | 11 0 2661 2796
12 | 12 1 2797 2960
13 | 13 1 2961 3296
14 | 14 1 3297 3470
15 | 15 1 3471 3663
16 | 16 0 3664 3847
17 | 17 1 3848 4122
18 | 18 0 4123 4179
19 | 19 1 4180 4417
20 | 20 0 4418 4589
21 | 21 1 4590 4889
22 | 22 0 4890 5035
23 | 23 0 5036 5211
24 | 24 1 5212 5411
25 | 25 0 5412 5591
26 | 26 1 5592 5761
27 | 27 1 5762 5967
28 | 28 0 5968 7035
29 | 29 0 7036 7254
30 | 30 0 7255 7416
31 | 31 0 7417 7687
32 | 32 0 7688 7849
33 | 33 0 7850 8034
34 | 34 0 8035 8231
35 | 35 0 8232 8420
36 | 36 0 8421 8543
37 | 37 0 8544 8747
38 | 38 0 8748 8946
39 | 39 0 8947 9057
40 | 40 0 9058 9329
41 | 41 0 9330 9462
42 | 42 0 9463 9701
43 | 43 0 9702 9839
44 | 44 0 9840 10022
45 | 45 0 10023 10206
46 | 46 0 10207 10375
47 | 47 1 10376 10481
48 | 48 0 10482 10678
49 | 49 0 10679 10987
50 | 50 0 10988 11108
51 | 51 0 11109 11370
52 | 52 1 11371 11545
53 | 53 0 11546 11710
54 | 54 0 11711 11885
55 | 55 0 11886 12069
56 | 56 1 12070 12430
57 | 57 1 12431 12713
58 | 58 1 12714 12898
59 | 59 1 12899 13290
60 | 60 1 13291 13501
61 | 61 1 13502 13627
62 | 62 1 13628 13761
63 | 63 1 13762 13933
64 | 64 0 13934 14089
65 | 65 1 14090 14287
66 | 66 1 14288 14592
67 | 67 1 14593 14794
68 | 68 1 14795 15072
69 | 69 1 15073 15210
70 | 70 1 15211 15285
71 | 71 0 15286 15379
72 | 72 1 15380 15503
73 | 73 1 15504 15792
74 | 74 0 15793 15989
75 | 75 1 15990 16181
76 | 76 1 16182 16321
77 | 77 1 16322 16679
78 | 78 1 16680 16839
79 | 79 1 16840 17019
80 | 80 1 17020 17228
81 | 81 1 17229 17431
82 | 82 1 17432 17547
83 | 83 1 17548 17698
84 | 84 1 17699 17866
85 | 85 1 17867 18020
86 | 86 1 18021 18223
87 | 87 1 18224 18395
88 | 88 1 18396 18545
89 | 89 1 18546 18683
90 | 90 1 18684 18878
91 | 91 1 18879 19016
92 | 92 1 19017 19121
93 | 93 1 19122 19246
94 | 94 0 19247 19422
95 | 95 1 19423 19621
96 | 96 1 19622 19794
97 | 97 1 19795 19930
98 | 98 1 19931 20071
99 | 99 0 20072 20141
100 | 100 1 20142 20267
101 | 101 1 20268 20509
102 | 102 0 20510 20684
103 | 103 0 20685 21090
104 | 104 0 21091 21218
105 | 105 0 21219 21370
106 | 106 0 21371 21624
107 | 107 0 21625 21770
108 | 108 0 21771 21930
109 | 109 0 21931 22194
110 | 110 0 22195 22395
111 | 111 0 22396 22648
112 | 112 0 22649 22773
113 | 113 0 22774 22911
114 | 114 0 22912 23169
115 | 115 0 23170 23457
116 | 116 0 23458 23801
117 | 117 0 23802 23978
118 | 118 0 23979 24165
119 | 119 0 24166 24333
120 | 120 0 24334 24483
121 | 121 0 24484 24760
122 | 122 0 24761 25071
123 | 123 0 25072 25282
124 | 124 0 25283 25401
125 | 125 0 25402 25584
126 | 126 0 25585 25711
127 | 127 0 25712 25960
128 | 128 0 25961 26187
129 | 129 1 26188 26397
130 | 130 0 26398 26658
131 | 131 0 26659 26788
132 | 132 0 26789 27091
133 | 133 0 27092 27315
134 | 134 0 27316 27531
135 | 135 0 27532 27602
136 | 136 0 27603 27748
137 | 137 0 27749 27898
138 | 138 0 27899 28005
139 | 139 0 28006 28073
140 | 140 0 28074 28352
141 | 141 0 28353 28409
142 | 142 0 28410 28591
143 | 143 0 28592 28733
144 | 144 0 28734 28904
145 | 145 0 28905 29076
146 | 146 0 29077 29238
147 | 147 0 29239 29461
148 | 148 0 29462 29547
149 | 149 0 29548 29628
150 | 150 0 29629 29800
151 | 151 0 29801 29885
152 | 152 0 29886 30065
153 | 153 0 30066 30186
154 | 154 0 30187 30251
155 | 155 0 30252 30374
156 | 156 0 30375 30379
157 | 157 0 30380 30425
158 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5456869.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 257
2 | 2 1 258 751
3 | 3 0 752 880
4 | 4 1 881 1088
5 | 5 0 1089 1267
6 | 6 0 1268 1452
7 | 7 0 1453 1816
8 | 8 0 1817 2016
9 | 9 0 2017 2264
10 | 10 0 2265 2322
11 | 11 0 2323 2433
12 | 12 0 2434 2748
13 | 13 0 2749 2858
14 | 14 0 2859 3019
15 | 15 1 3020 3190
16 | 16 1 3191 3431
17 | 17 0 3432 3546
18 | 18 0 3547 3740
19 | 19 0 3741 3950
20 | 20 0 3951 4191
21 | 21 0 4192 4443
22 | 22 0 4444 4633
23 | 23 0 4634 4740
24 | 24 0 4741 4920
25 | 25 1 4921 5170
26 | 26 0 5171 5366
27 | 27 0 5367 5531
28 | 28 0 5532 5653
29 | 29 0 5654 5822
30 | 30 0 5823 5937
31 | 31 0 5938 6077
32 | 32 0 6078 6214
33 | 33 0 6215 6504
34 | 34 0 6505 6570
35 | 35 0 6571 6632
36 | 36 0 6633 6981
37 | 37 0 6982 7060
38 | 38 0 7061 7296
39 | 39 0 7297 7375
40 | 40 0 7376 7432
41 | 41 0 7433 7584
42 | 42 0 7585 7666
43 | 43 0 7667 7856
44 | 44 0 7857 7987
45 | 45 0 7988 8092
46 | 46 0 8093 8189
47 | 47 0 8190 8384
48 | 48 0 8385 8500
49 | 49 0 8501 8737
50 | 50 0 8738 8866
51 | 51 0 8867 8998
52 | 52 0 8999 9145
53 | 53 0 9146 9249
54 | 54 0 9250 9460
55 | 55 0 9461 9597
56 | 56 0 9598 9775
57 | 57 0 9776 9868
58 | 58 0 9869 9985
59 | 59 0 9986 10078
60 | 60 0 10079 10179
61 | 61 0 10180 10229
62 | 62 0 10230 10331
63 | 63 0 10332 10498
64 | 64 0 10499 10669
65 | 65 0 10670 10852
66 | 66 0 10853 10963
67 | 67 0 10964 11139
68 | 68 0 11140 11166
69 | 69 0 11167 11269
70 | 70 0 11270 11353
71 | 71 0 11354 11404
72 | 72 0 11405 11487
73 | 73 0 11488 11577
74 | 74 0 11578 11690
75 | 75 0 11691 11739
76 | 76 0 11740 11999
77 | 77 0 12000 12069
78 | 78 0 12070 12256
79 | 79 0 12257 12373
80 | 80 0 12374 12571
81 | 81 0 12572 12771
82 | 82 0 12772 12850
83 | 83 0 12851 13020
84 | 84 0 13021 13111
85 | 85 0 13112 13493
86 | 86 0 13494 13633
87 | 87 0 13634 13873
88 | 88 0 13874 14105
89 | 89 0 14106 14232
90 | 90 0 14233 14278
91 | 91 0 14279 14365
92 | 92 0 14366 14501
93 | 93 0 14502 14642
94 | 94 0 14643 14734
95 | 95 0 14735 14850
96 | 96 0 14851 14942
97 | 97 0 14943 15063
98 | 98 0 15064 15245
99 | 99 0 15246 15347
100 | 100 0 15348 15495
101 | 101 0 15496 15764
102 | 102 0 15765 15863
103 | 103 0 15864 15983
104 | 104 0 15984 16085
105 | 105 0 16086 16227
106 | 106 0 16228 16414
107 | 107 0 16415 16550
108 | 108 0 16551 16845
109 | 109 0 16846 17022
110 | 110 0 17023 17255
111 | 111 0 17256 17533
112 | 112 0 17534 17688
113 | 113 0 17689 17768
114 | 114 0 17769 17901
115 | 115 0 17902 18048
116 | 116 0 18049 18130
117 | 117 0 18131 18215
118 | 118 0 18216 18375
119 | 119 0 18376 18562
120 | 120 0 18563 18756
121 | 121 0 18757 18957
122 | 122 0 18958 19058
123 | 123 0 19059 19248
124 | 124 0 19249 19468
125 | 125 0 19469 19608
126 | 126 0 19609 19720
127 | 127 0 19721 19792
128 | 128 0 19793 19947
129 | 129 0 19948 20074
130 | 130 0 20075 20304
131 | 131 0 20305 20484
132 | 132 0 20485 20630
133 | 133 0 20631 20773
134 | 134 0 20774 20944
135 | 135 0 20945 21095
136 | 136 0 21096 21394
137 | 137 0 21395 21509
138 | 138 1 21510 21711
139 | 139 0 21712 21965
140 | 140 0 21966 22159
141 | 141 0 22160 22224
142 | 142 1 22225 22521
143 | 143 0 22522 22693
144 | 144 1 22694 22782
145 | 145 0 22783 22914
146 | 146 1 22915 23003
147 | 147 0 23004 23078
148 | 148 1 23079 23246
149 | 149 0 23247 23514
150 | 150 0 23515 23721
151 | 151 0 23722 24099
152 | 152 0 24100 24200
153 | 153 0 24201 24323
154 | 154 1 24324 24551
155 | 155 1 24552 24760
156 | 156 0 24761 24960
157 | 157 0 24961 25301
158 | 158 0 25302 25423
159 | 159 1 25424 25607
160 | 160 1 25608 25742
161 | 161 0 25743 26029
162 | 162 0 26030 26213
163 | 163 0 26214 26341
164 | 164 1 26342 26442
165 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6337513.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1645
2 | 2 1 1646 1816
3 | 3 1 1817 2173
4 | 4 1 2174 2479
5 | 5 1 2480 2604
6 | 6 1 2605 2844
7 | 7 0 2845 2933
8 | 8 0 2934 3070
9 | 9 0 3071 3334
10 | 10 0 3335 3530
11 | 11 0 3531 3616
12 | 12 0 3617 3863
13 | 13 0 3864 4053
14 | 14 0 4054 4210
15 | 15 0 4211 4297
16 | 16 0 4298 4469
17 | 17 0 4470 4732
18 | 18 0 4733 4902
19 | 19 0 4903 5155
20 | 20 1 5156 5263
21 | 21 0 5264 5538
22 | 22 0 5539 5769
23 | 23 0 5770 6005
24 | 24 0 6006 6212
25 | 25 0 6213 6434
26 | 26 0 6435 6461
27 | 27 0 6462 6852
28 | 28 0 6853 6990
29 | 29 0 6991 7211
30 | 30 0 7212 7333
31 | 31 0 7334 7590
32 | 32 0 7591 7712
33 | 33 0 7713 7871
34 | 34 0 7872 8125
35 | 35 0 8126 8298
36 | 36 0 8299 8510
37 | 37 0 8511 8683
38 | 38 0 8684 9003
39 | 39 0 9004 9232
40 | 40 0 9233 9398
41 | 41 0 9399 9511
42 | 42 0 9512 9664
43 | 43 0 9665 9950
44 | 44 0 9951 9978
45 | 45 0 9979 10127
46 | 46 0 10128 10532
47 | 47 0 10533 10694
48 | 48 0 10695 10943
49 | 49 0 10944 11286
50 | 50 0 11287 11458
51 | 51 0 11459 11570
52 | 52 0 11571 11729
53 | 53 0 11730 11890
54 | 54 0 11891 12111
55 | 55 0 12112 12354
56 | 56 0 12355 12469
57 | 57 0 12470 12649
58 | 58 0 12650 12858
59 | 59 0 12859 13167
60 | 60 0 13168 13336
61 | 61 0 13337 13656
62 | 62 0 13657 13820
63 | 63 0 13821 13963
64 | 64 0 13964 14071
65 | 65 1 14072 14223
66 | 66 1 14224 14354
67 | 67 0 14355 14586
68 | 68 0 14587 14733
69 | 69 1 14734 14986
70 | 70 0 14987 15166
71 | 71 0 15167 15237
72 | 72 0 15238 15379
73 | 73 0 15380 15483
74 | 74 0 15484 15761
75 | 75 0 15762 15935
76 | 76 0 15936 16051
77 | 77 1 16052 16192
78 | 78 1 16193 16468
79 | 79 0 16469 16670
80 | 80 0 16671 16853
81 | 81 0 16854 16988
82 | 82 0 16989 17110
83 | 83 0 17111 17270
84 | 84 0 17271 17479
85 | 85 0 17480 17735
86 | 86 0 17736 17894
87 | 87 0 17895 18133
88 | 88 1 18134 18391
89 | 89 0 18392 18587
90 | 90 1 18588 18792
91 | 91 0 18793 18923
92 | 92 0 18924 19103
93 | 93 0 19104 19265
94 | 94 0 19266 19640
95 | 95 0 19641 19748
96 | 96 0 19749 19973
97 | 97 0 19974 20268
98 | 98 0 20269 20551
99 | 99 0 20552 20612
100 | 100 0 20613 20713
101 | 101 0 20714 20966
102 | 102 0 20967 21218
103 | 103 0 21219 21467
104 | 104 0 21468 21602
105 | 105 0 21603 21859
106 | 106 0 21860 22023
107 | 107 0 22024 22160
108 | 108 0 22161 22314
109 | 109 0 22315 22500
110 | 110 0 22501 22823
111 | 111 0 22824 23008
112 | 112 0 23009 23682
113 | 113 0 23683 23885
114 | 114 0 23886 24102
115 | 115 0 24103 24241
116 | 116 0 24242 24378
117 | 117 0 24379 24534
118 | 118 0 24535 24669
119 | 119 1 24670 24949
120 | 120 0 24950 25030
121 | 121 0 25031 25168
122 | 122 0 25169 25367
123 | 123 0 25368 25535
124 | 124 0 25536 25671
125 | 125 0 25672 26072
126 | 126 0 26073 26143
127 | 127 0 26144 26224
128 | 128 0 26225 26404
129 | 129 0 26405 26646
130 | 130 0 26647 26884
131 | 131 0 26885 27038
132 | 132 1 27039 27189
133 | 133 0 27190 27384
134 | 134 0 27385 27588
135 | 135 0 27589 27702
136 | 136 1 27703 27886
137 | 137 0 27887 28075
138 | 138 0 28076 28197
139 | 139 0 28198 28517
140 | 140 0 28518 28755
141 | 141 0 28756 28938
142 | 142 0 28939 29162
143 | 143 1 29163 29353
144 | 144 1 29354 29551
145 | 145 1 29552 29607
146 | 146 1 29608 29813
147 | 147 0 29814 29967
148 | 148 0 29968 30192
149 | 149 0 30193 30415
150 | 150 0 30416 30575
151 | 151 0 30576 30773
152 | 152 0 30774 31053
153 | 153 0 31054 31369
154 | 154 1 31370 31552
155 | 155 0 31553 31823
156 | 156 0 31824 32361
157 | 157 1 32362 32552
158 | 158 0 32553 32850
159 | 159 1 32851 33112
160 | 160 0 33113 33229
161 | 161 1 33230 33448
162 | 162 0 33449 33642
163 | 163 0 33643 33839
164 | 164 0 33840 34055
165 | 165 0 34056 34262
166 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5216129.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1271
2 | 2 1 1272 1408
3 | 3 1 1409 1653
4 | 4 0 1654 1844
5 | 5 0 1845 1961
6 | 6 0 1962 2065
7 | 7 1 2066 2263
8 | 8 0 2264 2526
9 | 9 0 2527 2735
10 | 10 0 2736 2878
11 | 11 0 2879 3155
12 | 12 1 3156 3284
13 | 13 1 3285 3601
14 | 14 1 3602 3849
15 | 15 1 3850 4034
16 | 16 1 4035 4402
17 | 17 0 4403 4690
18 | 18 1 4691 4924
19 | 19 0 4925 5085
20 | 20 1 5086 5354
21 | 21 1 5355 5521
22 | 22 1 5522 5639
23 | 23 1 5640 5853
24 | 24 0 5854 6139
25 | 25 0 6140 6279
26 | 26 0 6280 6494
27 | 27 0 6495 6632
28 | 28 0 6633 6792
29 | 29 0 6793 6963
30 | 30 0 6964 7104
31 | 31 1 7105 7299
32 | 32 0 7300 7520
33 | 33 0 7521 7861
34 | 34 1 7862 8147
35 | 35 1 8148 8287
36 | 36 0 8288 8522
37 | 37 1 8523 8718
38 | 38 1 8719 8804
39 | 39 1 8805 9059
40 | 40 1 9060 9460
41 | 41 0 9461 9752
42 | 42 1 9753 9940
43 | 43 0 9941 10029
44 | 44 1 10030 10282
45 | 45 1 10283 10421
46 | 46 0 10422 10656
47 | 47 1 10657 10915
48 | 48 1 10916 11105
49 | 49 1 11106 11280
50 | 50 1 11281 11616
51 | 51 0 11617 11787
52 | 52 0 11788 12148
53 | 53 1 12149 12418
54 | 54 0 12419 12546
55 | 55 0 12547 12843
56 | 56 0 12844 12933
57 | 57 0 12934 13038
58 | 58 0 13039 13311
59 | 59 0 13312 13436
60 | 60 0 13437 13771
61 | 61 1 13772 13951
62 | 62 0 13952 14083
63 | 63 0 14084 14276
64 | 64 0 14277 14743
65 | 65 0 14744 14873
66 | 66 0 14874 15119
67 | 67 0 15120 15319
68 | 68 1 15320 15511
69 | 69 0 15512 15652
70 | 70 0 15653 15781
71 | 71 0 15782 15901
72 | 72 0 15902 15986
73 | 73 0 15987 16112
74 | 74 0 16113 16395
75 | 75 0 16396 16559
76 | 76 0 16560 16740
77 | 77 0 16741 16904
78 | 78 0 16905 17099
79 | 79 0 17100 17616
80 | 80 0 17617 17738
81 | 81 0 17739 17809
82 | 82 0 17810 18020
83 | 83 0 18021 18246
84 | 84 0 18247 18415
85 | 85 0 18416 18612
86 | 86 0 18613 19042
87 | 87 0 19043 19213
88 | 88 0 19214 19439
89 | 89 0 19440 19582
90 | 90 0 19583 19951
91 | 91 0 19952 20193
92 | 92 1 20194 20328
93 | 93 1 20329 20436
94 | 94 1 20437 20558
95 | 95 0 20559 20795
96 | 96 1 20796 21018
97 | 97 0 21019 21150
98 | 98 0 21151 21256
99 | 99 1 21257 21579
100 | 100 1 21580 21691
101 | 101 1 21692 21915
102 | 102 0 21916 22210
103 | 103 0 22211 22518
104 | 104 0 22519 22851
105 | 105 0 22852 22949
106 | 106 0 22950 23142
107 | 107 0 23143 23263
108 | 108 0 23264 23550
109 | 109 0 23551 23680
110 | 110 0 23681 23820
111 | 111 0 23821 23976
112 | 112 0 23977 24141
113 | 113 0 24142 24296
114 | 114 0 24297 24453
115 | 115 0 24454 24572
116 | 116 0 24573 24649
117 | 117 0 24650 24758
118 | 118 0 24759 24916
119 | 119 0 24917 24969
120 | 120 0 24970 25052
121 | 121 0 25053 25131
122 | 122 0 25132 25246
123 | 123 0 25247 25725
124 | 124 0 25726 25856
125 | 125 0 25857 26032
126 | 126 0 26033 26279
127 | 127 0 26280 26388
128 | 128 0 26389 26480
129 | 129 0 26481 26579
130 | 130 0 26580 26716
131 | 131 0 26717 26822
132 | 132 0 26823 26976
133 | 133 0 26977 27212
134 | 134 0 27213 27279
135 | 135 0 27280 27405
136 | 136 0 27406 27584
137 | 137 0 27585 27667
138 | 138 0 27668 27782
139 | 139 0 27783 27875
140 | 140 0 27876 27959
141 | 141 0 27960 28122
142 | 142 0 28123 28214
143 | 143 0 28215 28359
144 | 144 0 28360 28568
145 | 145 0 28569 28673
146 | 146 0 28674 28738
147 | 147 0 28739 28825
148 | 148 0 28826 28934
149 | 149 0 28935 29120
150 | 150 0 29121 29309
151 | 151 0 29310 29468
152 | 152 0 29469 29632
153 | 153 0 29633 29858
154 | 154 0 29859 30123
155 | 155 0 30124 30213
156 | 156 0 30214 30293
157 | 157 0 30294 30356
158 | 158 0 30357 30624
159 | 159 0 30625 30872
160 | 160 0 30873 30974
161 | 161 0 30975 31015
162 | 162 0 31016 31152
163 | 163 0 31153 31215
164 | 164 0 31216 31317
165 | 165 0 31318 31322
166 | 166 0 31323 31372
167 | 167 0 31373 31509
168 |
--------------------------------------------------------------------------------
/code/source/dataHandling/sofc_exp_utils.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Annemarie Friedrich
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU Affero General Public License as published
7 | by the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU Affero General Public License for more details.
14 | You should have received a copy of the GNU Affero General Public License
15 | along with this program. If not, see .
16 | """
17 |
18 |
19 | from collections import defaultdict
20 | import csv
21 |
22 |
23 | def modify_cross_val_data_split(doc_info, num_folds, fold):
24 | """
25 | :param doc_info: corpus metadata dictionary (as created by get_sofc_corpus_metadata
26 | :param num_folds: total number of cross validation folds
27 | :param fold: the fold for which to return the train/dev split, folds are indexed by 1, 2, ...
28 | :return: the modified doc_info object
29 | """
30 | # sort training document IDs alphabetically
31 | train_ids = sorted([docid for docid in doc_info if doc_info[docid]["datasplit"] in set(["train", "dev"])])
32 | # create folds
33 | fold -= 1 # function called with 1 = first fold etc.
34 | # set all documents to train
35 | for docid in train_ids:
36 | doc_info[docid]["datasplit"] = "train"
37 | # pick the fold's dev documents
38 | for i in range(fold, len(train_ids), num_folds):
39 | docid = train_ids[i]
40 | doc_info[docid]["datasplit"] = "dev"
41 |
42 |
43 | def get_sofc_corpus_metadata(meta_csv_file):
44 | """
45 | Read list of all documents with licensing information, predefined data splits etc.
46 | :param meta_csv_file: CSV file with data split information.
47 | :return: a dictionary with the relevant information on data split, licensing, annotator for each document
48 | """
49 | doc_info = defaultdict(dict)
50 | with open(meta_csv_file, encoding='utf-8') as csvfile:
51 | csvreader = csv.reader(csvfile, delimiter='\t')
52 | header = next(csvreader)
53 | for row in csvreader:
54 | docid = row[header.index("name")]
55 | doc_info[docid]["license"] = row[header.index("license")]
56 | doc_info[docid]["datasplit"] = row[header.index("set")]
57 | return doc_info
58 |
59 |
60 | def get_data_split_docids(meta_csv_file, num_cross_val_folds=None, current_cross_val_fold=None):
61 | """
62 | :param meta_csv_file: CSV file with data split information
63 | :param num_cross_val_folds: if using cross validation, specify total number of splits
64 | :param fold: the current fold
65 | :return: returns the train, dev and test ids to use in this experiment.
66 | """
67 | # retrieve data split as defined in metadata
68 | doc_info = get_sofc_corpus_metadata(meta_csv_file)
69 | if num_cross_val_folds is not None:
70 | print("document info:", len(doc_info))
71 | modify_cross_val_data_split(doc_info, num_cross_val_folds, current_cross_val_fold)
72 | train_ids, dev_ids, test_ids = [], [], []
73 | for docid in doc_info:
74 | if doc_info[docid]["datasplit"] == "train":
75 | train_ids.append(docid)
76 | elif doc_info[docid]["datasplit"] == "dev":
77 | dev_ids.append(docid)
78 | elif doc_info[docid]["datasplit"] == "test":
79 | test_ids.append(docid)
80 | return train_ids, dev_ids, test_ids
81 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4986314.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 885
2 | 2 1 886 1048
3 | 3 0 1049 1271
4 | 4 0 1272 1520
5 | 5 0 1521 1643
6 | 6 0 1644 1959
7 | 7 0 1960 2306
8 | 8 0 2307 2582
9 | 9 0 2583 2698
10 | 10 0 2699 2944
11 | 11 0 2945 3058
12 | 12 0 3059 3219
13 | 13 0 3220 3342
14 | 14 0 3343 3352
15 | 15 0 3353 3533
16 | 16 0 3534 3721
17 | 17 0 3722 3910
18 | 18 0 3911 4126
19 | 19 0 4127 4295
20 | 20 0 4296 4457
21 | 21 0 4458 4700
22 | 22 1 4701 4861
23 | 23 0 4862 5020
24 | 24 0 5021 5098
25 | 25 0 5099 5279
26 | 26 0 5280 5400
27 | 27 0 5401 5553
28 | 28 0 5554 5741
29 | 29 0 5742 5950
30 | 30 0 5951 6105
31 | 31 0 6106 6297
32 | 32 0 6298 6399
33 | 33 0 6400 6572
34 | 34 0 6573 6748
35 | 35 0 6749 7031
36 | 36 0 7032 7118
37 | 37 0 7119 7321
38 | 38 0 7322 7580
39 | 39 0 7581 7778
40 | 40 0 7779 7885
41 | 41 0 7886 8143
42 | 42 0 8144 8438
43 | 43 0 8439 8582
44 | 44 0 8583 8660
45 | 45 0 8661 8793
46 | 46 0 8794 9041
47 | 47 0 9042 9149
48 | 48 0 9150 9225
49 | 49 0 9226 9330
50 | 50 0 9331 9458
51 | 51 0 9459 9667
52 | 52 0 9668 9804
53 | 53 0 9805 10111
54 | 54 0 10112 10271
55 | 55 0 10272 10456
56 | 56 0 10457 10545
57 | 57 0 10546 10679
58 | 58 0 10680 10857
59 | 59 0 10858 11023
60 | 60 0 11024 11069
61 | 61 0 11070 11226
62 | 62 0 11227 11347
63 | 63 0 11348 11564
64 | 64 0 11565 11714
65 | 65 0 11715 11796
66 | 66 0 11797 11966
67 | 67 0 11967 12107
68 | 68 0 12108 12175
69 | 69 0 12176 12277
70 | 70 0 12278 12418
71 | 71 0 12419 12736
72 | 72 0 12737 12836
73 | 73 0 12837 13027
74 | 74 0 13028 13151
75 | 75 0 13152 13209
76 | 76 0 13210 13419
77 | 77 0 13420 13735
78 | 78 0 13736 13992
79 | 79 0 13993 14241
80 | 80 0 14242 14338
81 | 81 0 14339 14505
82 | 82 0 14506 14637
83 | 83 0 14638 14829
84 | 84 0 14830 14968
85 | 85 0 14969 15118
86 | 86 0 15119 15274
87 | 87 0 15275 15583
88 | 88 0 15584 15596
89 | 89 0 15597 15709
90 | 90 0 15710 15870
91 | 91 0 15871 15971
92 | 92 0 15972 16226
93 | 93 0 16227 16484
94 | 94 0 16485 16667
95 | 95 0 16668 16677
96 | 96 0 16678 16788
97 | 97 0 16789 16941
98 | 98 0 16942 17095
99 | 99 0 17096 17288
100 | 100 0 17289 17459
101 | 101 0 17460 17596
102 | 102 0 17597 17738
103 | 103 0 17739 17903
104 | 104 0 17904 18200
105 | 105 0 18201 18299
106 | 106 0 18300 18368
107 | 107 0 18369 18535
108 | 108 0 18536 18617
109 | 109 0 18618 18702
110 | 110 0 18703 18792
111 | 111 0 18793 18928
112 | 112 0 18929 18975
113 | 113 0 18976 19114
114 | 114 0 19115 19123
115 | 115 0 19124 19297
116 | 116 0 19298 19427
117 | 117 0 19428 19900
118 | 118 0 19901 20060
119 | 119 0 20061 20160
120 | 120 0 20161 20411
121 | 121 0 20412 20662
122 | 122 0 20663 20753
123 | 123 0 20754 20864
124 | 124 0 20865 21043
125 | 125 1 21044 21209
126 | 126 0 21210 21564
127 | 127 0 21565 21703
128 | 128 0 21704 21805
129 | 129 0 21806 21913
130 | 130 0 21914 22049
131 | 131 0 22050 22201
132 | 132 0 22202 22398
133 | 133 0 22399 22581
134 | 134 0 22582 22832
135 | 135 0 22833 23031
136 | 136 0 23032 23250
137 | 137 0 23251 23306
138 | 138 0 23307 23494
139 | 139 0 23495 23608
140 | 140 0 23609 23679
141 | 141 0 23680 23777
142 | 142 0 23778 24162
143 | 143 0 24163 24438
144 | 144 0 24439 24704
145 | 145 0 24705 24871
146 | 146 0 24872 25005
147 | 147 0 25006 25114
148 | 148 0 25115 25214
149 | 149 0 25215 25429
150 | 150 0 25430 25551
151 | 151 0 25552 25734
152 | 152 0 25735 26026
153 | 153 0 26027 26036
154 | 154 0 26037 26134
155 | 155 0 26135 26237
156 | 156 0 26238 26488
157 | 157 0 26489 26571
158 | 158 0 26572 26761
159 | 159 0 26762 26867
160 | 160 0 26868 27042
161 | 161 0 27043 27217
162 | 162 1 27218 27411
163 | 163 0 27412 27512
164 | 164 0 27513 27610
165 | 165 0 27611 27799
166 | 166 0 27800 27901
167 | 167 0 27902 28104
168 | 168 0 28105 28333
169 | 169 0 28334 28448
170 | 170 0 28449 28751
171 | 171 0 28752 28970
172 | 172 0 28971 29116
173 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5457246.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 312
2 | 2 0 313 571
3 | 3 0 572 757
4 | 4 1 758 1001
5 | 5 0 1002 1105
6 | 6 1 1106 1288
7 | 7 1 1289 1449
8 | 8 1 1450 1618
9 | 9 1 1619 1804
10 | 10 0 1805 2051
11 | 11 1 2052 2202
12 | 12 1 2203 2608
13 | 13 0 2609 2863
14 | 14 0 2864 3236
15 | 15 0 3237 3440
16 | 16 0 3441 3671
17 | 17 0 3672 3819
18 | 18 0 3820 4029
19 | 19 0 4030 4285
20 | 20 0 4286 4585
21 | 21 0 4586 4733
22 | 22 1 4734 5015
23 | 23 0 5016 5236
24 | 24 0 5237 5520
25 | 25 0 5521 5577
26 | 26 1 5578 5767
27 | 27 1 5768 5921
28 | 28 0 5922 6012
29 | 29 1 6013 6358
30 | 30 0 6359 6449
31 | 31 0 6450 6785
32 | 32 0 6786 7021
33 | 33 0 7022 7144
34 | 34 0 7145 7299
35 | 35 1 7300 7917
36 | 36 0 7918 7999
37 | 37 0 8000 8145
38 | 38 0 8146 8335
39 | 39 0 8336 8540
40 | 40 0 8541 8652
41 | 41 0 8653 8845
42 | 42 0 8846 8948
43 | 43 0 8949 9059
44 | 44 0 9060 9182
45 | 45 0 9183 9443
46 | 46 0 9444 9586
47 | 47 0 9587 9722
48 | 48 0 9723 9963
49 | 49 0 9964 10095
50 | 50 0 10096 10325
51 | 51 0 10326 10539
52 | 52 0 10540 10744
53 | 53 0 10745 10914
54 | 54 0 10915 11096
55 | 55 0 11097 11308
56 | 56 0 11309 11468
57 | 57 0 11469 11620
58 | 58 0 11621 11802
59 | 59 0 11803 11918
60 | 60 0 11919 12119
61 | 61 0 12120 12420
62 | 62 0 12421 12716
63 | 63 0 12717 12954
64 | 64 1 12955 13216
65 | 65 0 13217 13297
66 | 66 0 13298 13475
67 | 67 1 13476 13582
68 | 68 0 13583 13817
69 | 69 1 13818 13915
70 | 70 0 13916 14083
71 | 71 1 14084 14170
72 | 72 0 14171 14434
73 | 73 0 14435 14549
74 | 74 1 14550 14639
75 | 75 1 14640 14765
76 | 76 1 14766 14933
77 | 77 0 14934 15104
78 | 78 0 15105 15205
79 | 79 0 15206 15318
80 | 80 0 15319 15484
81 | 81 1 15485 15564
82 | 82 1 15565 15676
83 | 83 1 15677 15821
84 | 84 1 15822 15910
85 | 85 1 15911 16123
86 | 86 0 16124 16250
87 | 87 1 16251 16435
88 | 88 1 16436 16498
89 | 89 1 16499 16603
90 | 90 0 16604 16769
91 | 91 0 16770 16840
92 | 92 0 16841 16954
93 | 93 0 16955 17090
94 | 94 0 17091 17159
95 | 95 1 17160 17330
96 | 96 1 17331 17427
97 | 97 1 17428 17680
98 | 98 0 17681 17830
99 | 99 0 17831 17941
100 | 100 0 17942 18108
101 | 101 0 18109 18318
102 | 102 0 18319 18495
103 | 103 1 18496 18587
104 | 104 1 18588 18697
105 | 105 0 18698 18937
106 | 106 0 18938 19097
107 | 107 0 19098 19261
108 | 108 0 19262 19395
109 | 109 0 19396 19548
110 | 110 1 19549 19742
111 | 111 1 19743 19828
112 | 112 0 19829 19924
113 | 113 0 19925 20167
114 | 114 0 20168 20414
115 | 115 0 20415 20573
116 | 116 1 20574 20692
117 | 117 0 20693 21044
118 | 118 0 21045 21562
119 | 119 0 21563 21945
120 | 120 0 21946 22102
121 | 121 0 22103 22302
122 | 122 0 22303 22462
123 | 123 0 22463 22546
124 | 124 0 22547 22734
125 | 125 0 22735 22818
126 | 126 0 22819 22862
127 | 127 0 22863 22985
128 | 128 0 22986 23130
129 | 129 0 23131 23259
130 | 130 0 23260 23297
131 | 131 0 23298 23365
132 | 132 0 23366 23495
133 | 133 0 23496 23665
134 | 134 0 23666 23823
135 | 135 0 23824 23930
136 | 136 0 23931 24039
137 | 137 0 24040 24261
138 | 138 0 24262 24370
139 | 139 0 24371 24622
140 | 140 0 24623 24833
141 | 141 0 24834 24924
142 | 142 0 24925 25150
143 | 143 0 25151 25349
144 | 144 0 25350 25507
145 | 145 0 25508 25759
146 | 146 0 25760 25890
147 | 147 0 25891 26072
148 | 148 0 26073 26117
149 | 149 0 26118 26189
150 | 150 0 26190 26283
151 | 151 0 26284 26560
152 | 152 0 26561 26659
153 | 153 0 26660 26864
154 | 154 0 26865 27047
155 | 155 0 27048 27171
156 | 156 0 27172 27255
157 | 157 0 27256 27421
158 | 158 0 27422 27588
159 | 159 0 27589 27704
160 | 160 0 27705 27923
161 | 161 0 27924 28082
162 | 162 0 28083 28214
163 | 163 0 28215 28351
164 | 164 0 28352 28495
165 | 165 0 28496 28604
166 | 166 0 28605 28707
167 | 167 0 28708 28846
168 | 168 0 28847 29051
169 | 169 1 29052 29259
170 | 170 0 29260 29444
171 | 171 0 29445 29597
172 | 172 0 29598 29732
173 | 173 0 29733 29844
174 | 174 0 29845 29920
175 | 175 0 29921 30038
176 | 176 0 30039 30187
177 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6523084.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 6 0 20
2 | SPAN 2 EXPERIMENT:current_exp 6 35 43
3 | SPAN 3 MATERIAL 6 113 127
4 | SPAN 4 MATERIAL 6 129 132
5 | SPAN 5 MATERIAL 6 169 185
6 | SPAN 6 MATERIAL 6 187 190
7 | SPAN 7 MATERIAL 21 20 34
8 | SPAN 8 MATERIAL 21 76 92
9 | SPAN 9 MATERIAL 21 149 173
10 | SPAN 10 MATERIAL 21 175 179
11 | SPAN 11 MATERIAL 35 129 145
12 | SPAN 12 MATERIAL 35 147 150
13 | SPAN 13 MATERIAL 35 155 159
14 | SPAN 14 MATERIAL 45 95 110
15 | SPAN 15 MATERIAL 45 130 141
16 | SPAN 16 EXPERIMENT:general_info 45 154 158
17 | SPAN 17 DEVICE 45 187 191
18 | SPAN 18 MATERIAL 46 0 4
19 | SPAN 19 EXPERIMENT:general_info 46 33 37
20 | SPAN 20 DEVICE 46 54 58
21 | SPAN 21 MATERIAL 47 11 26
22 | SPAN 22 EXPERIMENT:general_info 47 41 46
23 | SPAN 23 VALUE 47 99 114
24 | SPAN 24 MATERIAL 57 42 56
25 | SPAN 25 MATERIAL 57 58 61
26 | SPAN 26 MATERIAL 58 65 81
27 | SPAN 27 MATERIAL 58 83 86
28 | SPAN 28 MATERIAL 58 89 105
29 | SPAN 29 MATERIAL 58 107 110
30 | SPAN 30 MATERIAL 58 113 129
31 | SPAN 31 MATERIAL 58 131 134
32 | SPAN 32 MATERIAL 58 141 165
33 | SPAN 33 MATERIAL 58 167 171
34 | SPAN 34 MATERIAL 63 105 117
35 | SPAN 35 MATERIAL 63 119 122
36 | SPAN 36 EXPERIMENT:current_exp 120 23 32
37 | SPAN 37 MATERIAL 120 36 40
38 | SPAN 38 MATERIAL 120 41 44
39 | SPAN 39 MATERIAL 120 45 49
40 | SPAN 40 DEVICE 120 62 67
41 | SPAN 41 EXPERIMENT:current_exp 163 15 19
42 | SPAN 42 VALUE 163 23 29
43 | SPAN 43 MATERIAL 163 33 36
44 | SPAN 44 VALUE 163 64 68
45 | SPAN 45 EXPERIMENT:current_exp 183 39 47
46 | SPAN 46 VALUE 183 51 57
47 | SPAN 47 DEVICE 183 99 104
48 | SPAN 48 DEVICE 184 45 50
49 | SPAN 49 EXPERIMENT:current_exp 184 51 55
50 | SPAN 50 VALUE 184 77 83
51 | SPAN 51 DEVICE 185 36 41
52 | SPAN 52 EXPERIMENT:current_exp 185 42 49
53 | SPAN 53 VALUE 185 89 100
54 | SPAN 54 MATERIAL 185 105 108
55 | SPAN 55 DEVICE 185 121 125
56 | SPAN 56 VALUE 185 142 153
57 | SPAN 57 MATERIAL 185 158 161
58 | SPAN 58 VALUE 185 208 219
59 | SPAN 59 EXPERIMENT:current_exp 186 57 66
60 | SPAN 60 VALUE 186 90 96
61 | SPAN 61 MATERIAL 186 98 101
62 | SPAN 62 VALUE 186 119 125
63 | SPAN 63 MATERIAL 186 127 130
64 | SPAN 64 VALUE 186 180 186
65 | SPAN 65 EXPERIMENT:current_exp 191 35 44
66 | SPAN 66 VALUE 191 48 58
67 | SPAN 67 DEVICE 191 77 81
68 | SPAN 68 VALUE 191 83 93
69 | SPAN 69 MATERIAL 191 112 120
70 | SPAN 70 DEVICE 191 121 125
71 | SPAN 71 VALUE 191 131 141
72 | SPAN 72 MATERIAL 191 159 167
73 | SPAN 73 DEVICE 191 168 172
74 | EXPERIMENT 1 2
75 | cathode_material 1
76 | cathode_material 3
77 | cathode_material 5
78 | EXPERIMENT 2 16
79 | cathode_material 14
80 | cathode_material 15
81 | device 17
82 | EXPERIMENT 3 19
83 | cathode_material 18
84 | device 20
85 | EXPERIMENT 4 22
86 | cathode_material 21
87 | degradation_rate 23
88 | EXPERIMENT 5 36
89 | cathode_material 37
90 | electrolyte_material 38
91 | anode_material 39
92 | device 40
93 | EXPERIMENT 6 41
94 | working_temperature 42
95 | fuel_used 43
96 | time_of_operation 44
97 | EXPERIMENT 7 45
98 | working_temperature 46
99 | device 47
100 | EXPERIMENT 8 49
101 | device 48
102 | open_circuit_voltage 50
103 | EXPERIMENT 9 52
104 | device 51
105 | power_density 53
106 | cathode_material 54
107 | device 55
108 | power_density 56
109 | cathode_material 57
110 | power_density 58
111 | EXPERIMENT 10 59
112 | resistance 60
113 | cathode_material 61
114 | resistance 62
115 | cathode_material 63
116 | resistance 64
117 | EXPERIMENT 11 65
118 | resistance 66
119 | device 67
120 | resistance 68
121 | cathode_material 69
122 | device 70
123 | resistance 71
124 | cathode_material 72
125 | device 73
126 | LINK same_experiment 49 52
127 | LINK same_experiment 52 59
128 | LINK coreference 3 4
129 | LINK coreference 5 6
130 | LINK coreference 9 10
131 | LINK coreference 11 12
132 | LINK coreference 26 27
133 | LINK coreference 28 29
134 | LINK coreference 30 31
135 | LINK coreference 32 33
136 | LINK coreference 34 35
137 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5848893.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 536
2 | 2 0 537 573
3 | 3 0 574 1393
4 | 4 1 1394 1571
5 | 5 1 1572 1827
6 | 6 0 1828 2012
7 | 7 0 2013 2246
8 | 8 0 2247 2413
9 | 9 0 2414 2538
10 | 10 0 2539 2784
11 | 11 0 2785 2982
12 | 12 0 2983 3068
13 | 13 0 3069 3180
14 | 14 1 3181 3349
15 | 15 0 3350 3454
16 | 16 0 3455 3622
17 | 17 0 3623 3767
18 | 18 0 3768 3854
19 | 19 0 3855 3973
20 | 20 0 3974 4227
21 | 21 0 4228 4496
22 | 22 0 4497 4627
23 | 23 0 4628 4959
24 | 24 0 4960 5111
25 | 25 1 5112 5387
26 | 26 0 5388 5502
27 | 27 0 5503 5702
28 | 28 0 5703 5753
29 | 29 0 5754 5998
30 | 30 0 5999 6146
31 | 31 1 6147 6364
32 | 32 0 6365 6516
33 | 33 1 6517 6805
34 | 34 0 6806 6945
35 | 35 0 6946 7155
36 | 36 0 7156 7312
37 | 37 1 7313 7523
38 | 38 1 7524 7743
39 | 39 0 7744 7911
40 | 40 1 7912 8076
41 | 41 1 8077 8349
42 | 42 1 8350 8540
43 | 43 0 8541 8705
44 | 44 0 8706 8723
45 | 45 0 8724 8897
46 | 46 0 8898 9001
47 | 47 0 9002 9131
48 | 48 0 9132 9254
49 | 49 0 9255 9413
50 | 50 0 9414 9529
51 | 51 0 9530 9626
52 | 52 0 9627 9908
53 | 53 0 9909 10109
54 | 54 0 10110 10223
55 | 55 0 10224 10284
56 | 56 0 10285 10393
57 | 57 0 10394 10539
58 | 58 0 10540 10653
59 | 59 0 10654 10702
60 | 60 0 10703 11135
61 | 61 0 11136 11235
62 | 62 0 11236 11326
63 | 63 0 11327 11441
64 | 64 0 11442 11651
65 | 65 0 11652 11797
66 | 66 0 11798 11978
67 | 67 0 11979 12045
68 | 68 0 12046 12160
69 | 69 0 12161 12248
70 | 70 0 12249 12531
71 | 71 0 12532 12608
72 | 72 0 12609 12621
73 | 73 0 12622 12849
74 | 74 0 12850 12958
75 | 75 0 12959 13076
76 | 76 0 13077 13167
77 | 77 0 13168 13251
78 | 78 0 13252 13320
79 | 79 1 13321 13547
80 | 80 0 13548 13678
81 | 81 0 13679 13761
82 | 82 0 13762 13916
83 | 83 0 13917 14064
84 | 84 0 14065 14244
85 | 85 0 14245 14728
86 | 86 0 14729 14903
87 | 87 0 14904 15061
88 | 88 1 15062 15190
89 | 89 1 15191 15305
90 | 90 0 15306 15487
91 | 91 1 15488 15578
92 | 92 1 15579 15681
93 | 93 0 15682 15835
94 | 94 0 15836 16026
95 | 95 0 16027 16166
96 | 96 0 16167 16286
97 | 97 0 16287 16454
98 | 98 0 16455 16653
99 | 99 0 16654 16787
100 | 100 0 16788 16884
101 | 101 0 16885 16960
102 | 102 0 16961 17174
103 | 103 0 17175 17367
104 | 104 0 17368 17517
105 | 105 0 17518 17656
106 | 106 0 17657 17849
107 | 107 0 17850 18060
108 | 108 0 18061 18206
109 | 109 0 18207 18272
110 | 110 0 18273 18426
111 | 111 0 18427 18524
112 | 112 0 18525 18755
113 | 113 0 18756 18923
114 | 114 0 18924 19144
115 | 115 0 19145 19305
116 | 116 0 19306 19492
117 | 117 0 19493 19508
118 | 118 0 19509 19816
119 | 119 0 19817 20012
120 | 120 0 20013 20281
121 | 121 0 20282 20445
122 | 122 0 20446 20591
123 | 123 0 20592 20735
124 | 124 0 20736 20937
125 | 125 0 20938 21054
126 | 126 0 21055 21102
127 | 127 0 21103 21274
128 | 128 0 21275 21455
129 | 129 0 21456 21742
130 | 130 0 21743 21855
131 | 131 0 21856 21995
132 | 132 0 21996 22223
133 | 133 0 22224 22397
134 | 134 0 22398 22646
135 | 135 0 22647 22917
136 | 136 0 22918 23058
137 | 137 0 23059 23122
138 | 138 0 23123 23336
139 | 139 0 23337 23551
140 | 140 0 23552 23644
141 | 141 0 23645 23801
142 | 142 0 23802 24042
143 | 143 0 24043 24237
144 | 144 0 24238 24409
145 | 145 0 24410 24490
146 | 146 0 24491 24617
147 | 147 0 24618 24737
148 | 148 0 24738 24840
149 | 149 0 24841 24975
150 | 150 0 24976 25078
151 | 151 0 25079 25254
152 | 152 0 25255 25451
153 | 153 0 25452 25582
154 | 154 0 25583 25724
155 | 155 0 25725 26028
156 | 156 0 26029 26273
157 | 157 0 26274 26470
158 | 158 0 26471 26620
159 | 159 0 26621 26777
160 | 160 0 26778 27005
161 | 161 0 27006 27182
162 | 162 0 27183 27332
163 | 163 0 27333 27562
164 | 164 0 27563 27707
165 | 165 0 27708 27838
166 | 166 0 27839 27972
167 | 167 0 27973 28117
168 | 168 0 28118 28313
169 | 169 1 28314 28464
170 | 170 1 28465 28678
171 | 171 0 28679 28852
172 | 172 0 28853 29023
173 | 173 0 29024 29253
174 | 174 0 29254 29391
175 | 175 0 29392 29481
176 | 176 0 29482 29572
177 | 177 0 29573 29794
178 | 178 0 29795 30051
179 |
--------------------------------------------------------------------------------
/code/source/main_preprocess.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 | from argparse import ArgumentParser
20 |
21 | from preprocess import preprocess_embeddings
22 | from dataHandling.sofc_exp_utils import get_data_split_docids
23 | from dataHandling.dataLoader import get_vocab
24 |
25 | parser = ArgumentParser()
26 | # data location
27 | parser.add_argument('-corpus_dir', default='../sofc-exp-corpus', type=str)
28 | parser.add_argument('-corpus_meta_data_file', default='../sofc-exp-corpus/SOFC-Exp-Metadata.csv', type=str)
29 |
30 | # pretrained embedding location
31 | parser.add_argument('-embedding_file_word2vec', default='../data/embeddings/GoogleNews-vectors-negative300.bin')
32 | parser.add_argument('-embedding_file_mat2vec', default='../data/embeddings/pretrained_embeddings')
33 | parser.add_argument('-embedding_file_bpe', default='../data/embeddings/en.wiki.bpe.vs200000.d300.w2v.bin')
34 | parser.add_argument('-embedding_model_bpe', default='../data/embeddings/en.wiki.bpe.vs200000.model')
35 |
36 | # output location
37 | parser.add_argument('-output_word2index_file_mat2vec', default='../data/embeddings/word2index_mat2vec.pickle', type=str)
38 | parser.add_argument('-output_word2index_file_word2vec', default='../data/embeddings/word2index_word2vec.pickle', type=str)
39 | parser.add_argument('-output_word2index_file_bpe', default='../data/embeddings/word2index_bpe.pickle', type=str)
40 | parser.add_argument('-output_embedding_file_word2vec', default='../data/embeddings/word2vec.npy', type=str)
41 | parser.add_argument('-output_embedding_file_mat2vec', default='../data/embeddings/mat2vec.npy', type=str)
42 | parser.add_argument('-output_embedding_file_bpe', default='../data/embeddings/bpe.npy', type=str)
43 |
44 | args = parser.parse_args()
45 |
46 | print(args)
47 | # convert args to a dictionary
48 | options = vars(args)
49 |
50 | mat2vec_file = options['embedding_file_mat2vec']
51 | word2vec_file = options['embedding_file_word2vec']
52 | bpe_file = options['embedding_file_bpe']
53 |
54 | train_ids, dev_ids, test_ids = get_data_split_docids(options['corpus_meta_data_file'])
55 |
56 | data_vocabulary = get_vocab(options['corpus_dir'], train_ids + dev_ids + test_ids)
57 |
58 | mat2vec_word2index, mat2vec_matrix = preprocess_embeddings.get_embedding_weight_matrix(mat2vec_file, "mat2vec", data_vocabulary)
59 | word2vec_word2index, word2vec_matrix = preprocess_embeddings.get_embedding_weight_matrix(word2vec_file, "word2vec", data_vocabulary)
60 | bpe_word2index, bpe_matrix = preprocess_embeddings.get_bpe_embedding_weight_matrix(bpe_file, options["embedding_model_bpe"], data_vocabulary)
61 |
62 | preprocess_embeddings.save_word2index(mat2vec_word2index, options['output_word2index_file_mat2vec'])
63 | preprocess_embeddings.save_word2index(word2vec_word2index, options['output_word2index_file_word2vec'])
64 | preprocess_embeddings.save_word2index(bpe_word2index, options['output_word2index_file_bpe'])
65 |
66 | preprocess_embeddings.save_weight_matrix(mat2vec_matrix, options['output_embedding_file_mat2vec'])
67 | preprocess_embeddings.save_weight_matrix(word2vec_matrix, options['output_embedding_file_word2vec'])
68 | preprocess_embeddings.save_weight_matrix(bpe_matrix, options['output_embedding_file_bpe'])
69 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5700654.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 DEVICE 9 83 88
2 | SPAN 2 MATERIAL 10 51 72
3 | SPAN 3 MATERIAL 10 74 78
4 | SPAN 4 MATERIAL 10 163 177
5 | SPAN 5 MATERIAL 10 179 182
6 | SPAN 6 MATERIAL 11 161 192
7 | SPAN 7 MATERIAL 11 194 199
8 | SPAN 8 DEVICE 88 2 6
9 | SPAN 9 MATERIAL 88 20 25
10 | SPAN 10 EXPERIMENT:current_exp 88 32 41
11 | SPAN 11 VALUE 88 72 83
12 | SPAN 12 VALUE 88 87 93
13 | SPAN 13 EXPERIMENT:current_exp 88 94 103
14 | SPAN 14 MATERIAL 88 107 109
15 | SPAN 15 EXPERIMENT:current_exp 88 117 120
16 | SPAN 16 MATERIAL 88 159 163
17 | SPAN 17 DEVICE 90 2 6
18 | SPAN 18 EXPERIMENT:current_exp 90 7 17
19 | SPAN 19 MATERIAL 90 29 46
20 | SPAN 20 MATERIAL 90 47 50
21 | SPAN 21 MATERIAL 90 64 67
22 | SPAN 22 MATERIAL 90 84 109
23 | SPAN 23 MATERIAL 90 112 115
24 | SPAN 24 VALUE 90 185 188
25 | SPAN 25 VALUE 90 190 193
26 | SPAN 26 VALUE 90 199 204
27 | SPAN 27 EXPERIMENT:current_exp 90 206 215
28 | SPAN 28 VALUE 90 224 227
29 | SPAN 29 VALUE 90 229 232
30 | SPAN 30 VALUE 90 234 237
31 | SPAN 31 VALUE 90 239 242
32 | SPAN 32 VALUE 90 248 259
33 | SPAN 33 EXPERIMENT:current_exp 90 260 269
34 | SPAN 34 MATERIAL 90 273 275
35 | SPAN 35 VALUE 90 284 287
36 | SPAN 36 VALUE 90 289 292
37 | SPAN 37 VALUE 90 294 297
38 | SPAN 38 VALUE 90 299 302
39 | SPAN 39 VALUE 90 308 314
40 | SPAN 40 DEVICE 94 38 43
41 | SPAN 41 MATERIAL 94 57 62
42 | SPAN 42 MATERIAL 94 68 86
43 | SPAN 43 EXPERIMENT:current_exp 94 94 98
44 | SPAN 44 VALUE 94 99 102
45 | SPAN 45 VALUE 94 107 118
46 | SPAN 46 EXPERIMENT:current_exp 94 119 128
47 | SPAN 47 MATERIAL 94 132 134
48 | SPAN 48 EXPERIMENT:current_exp 96 35 38
49 | SPAN 49 MATERIAL 96 39 56
50 | SPAN 50 DEVICE 96 74 79
51 | SPAN 51 MATERIAL 96 97 118
52 | SPAN 52 MATERIAL 96 120 138
53 | SPAN 53 MATERIAL 96 144 163
54 | SPAN 54 MATERIAL 96 164 167
55 | SPAN 55 EXPERIMENT:current_exp 96 175 179
56 | SPAN 56 VALUE 96 180 183
57 | SPAN 57 VALUE 96 185 188
58 | SPAN 58 VALUE 96 194 205
59 | SPAN 59 DEVICE 103 42 46
60 | SPAN 60 MATERIAL 103 64 81
61 | SPAN 61 MATERIAL 103 82 85
62 | SPAN 62 EXPERIMENT:current_exp 103 92 100
63 | SPAN 63 VALUE 103 120 125
64 | SPAN 64 VALUE 103 130 132
65 | SPAN 65 VALUE 103 137 141
66 | SPAN 66 MATERIAL 103 145 167
67 | SPAN 67 DEVICE 108 30 34
68 | SPAN 68 EXPERIMENT:current_exp 108 45 55
69 | SPAN 69 VALUE 108 59 65
70 | SPAN 70 VALUE 108 70 81
71 | EXPERIMENT 1 10
72 | device 8
73 | anode_material 9
74 | power_density 11
75 | working_temperature 12
76 | EXPERIMENT 2 13
77 | fuel_used 14
78 | EXPERIMENT 3 15
79 | anode_material 16
80 | EXPERIMENT 4 18
81 | device 17
82 | anode_material 19
83 | anode_material 20
84 | electrolyte_material 21
85 | cathode_material 22
86 | cathode_material 23
87 | EXPERIMENT 5 27
88 | power_density 28
89 | power_density 29
90 | power_density 30
91 | power_density 31
92 | power_density 32
93 | EXPERIMENT 6 33
94 | fuel_used 34
95 | working_temperature 35
96 | working_temperature 36
97 | working_temperature 37
98 | working_temperature 38
99 | working_temperature 39
100 | EXPERIMENT 7 43
101 | device 40
102 | anode_material 41
103 | anode_material 42
104 | power_density 44
105 | power_density 45
106 | EXPERIMENT 8 46
107 | fuel_used 47
108 | EXPERIMENT 9 48
109 | fuel_used 49
110 | EXPERIMENT 10 55
111 | device 50
112 | anode_material 51
113 | anode_material 52
114 | anode_material 53
115 | anode_material 54
116 | power_density 56
117 | power_density 57
118 | power_density 58
119 | EXPERIMENT 11 62
120 | device 59
121 | anode_material 60
122 | anode_material 61
123 | voltage 63
124 | time_of_operation 64
125 | time_of_operation 65
126 | fuel_used 66
127 | EXPERIMENT 12 68
128 | device 67
129 | voltage 69
130 | power_density 70
131 | LINK experiment_variation 15 10
132 | LINK same_experiment 10 13
133 | LINK thickness 20 24
134 | LINK thickness 21 25
135 | LINK thickness 22 26
136 | LINK same_experiment 18 27
137 | LINK same_experiment 27 33
138 | LINK same_experiment 43 46
139 | LINK same_experiment 48 55
140 | LINK coreference 2 3
141 | LINK coreference 4 5
142 | LINK coreference 6 7
143 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6461657.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1340
2 | 2 1 1341 1503
3 | 3 1 1504 1750
4 | 4 1 1751 1955
5 | 5 1 1956 2060
6 | 6 0 2061 2251
7 | 7 1 2252 2377
8 | 8 0 2378 2482
9 | 9 0 2483 2567
10 | 10 0 2568 2954
11 | 11 0 2955 3192
12 | 12 1 3193 3565
13 | 13 0 3566 3934
14 | 14 1 3935 4277
15 | 15 0 4278 4590
16 | 16 0 4591 4731
17 | 17 1 4732 5083
18 | 18 0 5084 5368
19 | 19 1 5369 5702
20 | 20 0 5703 5921
21 | 21 0 5922 6098
22 | 22 0 6099 6268
23 | 23 0 6269 6409
24 | 24 0 6410 6659
25 | 25 1 6660 6857
26 | 26 0 6858 7076
27 | 27 0 7077 7296
28 | 28 0 7297 7515
29 | 29 0 7516 7668
30 | 30 1 7669 7831
31 | 31 1 7832 8078
32 | 32 1 8079 8378
33 | 33 0 8379 8549
34 | 34 0 8550 8657
35 | 35 0 8658 8821
36 | 36 0 8822 9055
37 | 37 0 9056 9248
38 | 38 0 9249 9416
39 | 39 0 9417 9563
40 | 40 0 9564 9688
41 | 41 0 9689 9864
42 | 42 0 9865 10244
43 | 43 0 10245 10356
44 | 44 0 10357 10541
45 | 45 0 10542 10656
46 | 46 0 10657 11140
47 | 47 0 11141 11260
48 | 48 0 11261 11493
49 | 49 0 11494 11593
50 | 50 0 11594 11699
51 | 51 0 11700 11796
52 | 52 0 11797 12058
53 | 53 0 12059 12232
54 | 54 0 12233 12475
55 | 55 0 12476 12603
56 | 56 0 12604 12695
57 | 57 1 12696 12853
58 | 58 1 12854 13061
59 | 59 0 13062 13759
60 | 60 1 13760 13885
61 | 61 1 13886 14081
62 | 62 1 14082 14333
63 | 63 1 14334 14537
64 | 64 1 14538 14871
65 | 65 0 14872 15200
66 | 66 0 15201 15384
67 | 67 0 15385 15454
68 | 68 0 15455 15669
69 | 69 0 15670 15834
70 | 70 1 15835 15957
71 | 71 1 15958 16084
72 | 72 0 16085 16258
73 | 73 1 16259 16399
74 | 74 1 16400 16559
75 | 75 0 16560 16768
76 | 76 1 16769 16905
77 | 77 0 16906 17068
78 | 78 1 17069 17253
79 | 79 1 17254 17670
80 | 80 0 17671 17833
81 | 81 0 17834 18070
82 | 82 0 18071 18253
83 | 83 0 18254 18463
84 | 84 0 18464 18735
85 | 85 1 18736 18973
86 | 86 0 18974 19137
87 | 87 0 19138 19245
88 | 88 0 19246 19425
89 | 89 0 19426 19589
90 | 90 0 19590 19662
91 | 91 0 19663 19860
92 | 92 0 19861 20076
93 | 93 1 20077 20216
94 | 94 1 20217 20329
95 | 95 1 20330 20415
96 | 96 1 20416 20570
97 | 97 0 20571 20701
98 | 98 0 20702 20897
99 | 99 1 20898 21322
100 | 100 0 21323 21447
101 | 101 0 21448 21756
102 | 102 0 21757 21896
103 | 103 0 21897 22122
104 | 104 1 22123 22316
105 | 105 0 22317 22442
106 | 106 0 22443 22606
107 | 107 0 22607 22928
108 | 108 0 22929 23075
109 | 109 0 23076 23158
110 | 110 0 23159 23268
111 | 111 0 23269 23374
112 | 112 0 23375 23722
113 | 113 0 23723 23826
114 | 114 0 23827 24060
115 | 115 0 24061 24245
116 | 116 0 24246 24330
117 | 117 0 24331 24484
118 | 118 0 24485 24609
119 | 119 0 24610 24715
120 | 120 0 24716 24951
121 | 121 0 24952 25139
122 | 122 0 25140 25645
123 | 123 0 25646 25880
124 | 124 0 25881 26125
125 | 125 0 26126 26344
126 | 126 0 26345 26501
127 | 127 0 26502 26649
128 | 128 0 26650 26848
129 | 129 0 26849 27147
130 | 130 0 27148 27280
131 | 131 0 27281 27392
132 | 132 0 27393 27524
133 | 133 0 27525 27713
134 | 134 0 27714 27826
135 | 135 1 27827 27997
136 | 136 1 27998 28256
137 | 137 0 28257 28447
138 | 138 1 28448 28865
139 | 139 0 28866 29098
140 | 140 0 29099 29409
141 | 141 0 29410 29542
142 | 142 0 29543 29667
143 | 143 0 29668 29854
144 | 144 0 29855 29994
145 | 145 0 29995 30253
146 | 146 0 30254 30558
147 | 147 0 30559 30815
148 | 148 0 30816 31067
149 | 149 1 31068 31229
150 | 150 1 31230 31527
151 | 151 0 31528 31806
152 | 152 0 31807 31942
153 | 153 0 31943 32110
154 | 154 0 32111 32265
155 | 155 0 32266 32370
156 | 156 0 32371 32659
157 | 157 0 32660 32890
158 | 158 0 32891 33022
159 | 159 0 33023 33165
160 | 160 0 33166 33324
161 | 161 0 33325 33455
162 | 162 0 33456 33646
163 | 163 0 33647 33769
164 | 164 0 33770 34151
165 | 165 0 34152 34294
166 | 166 0 34295 34479
167 | 167 0 34480 34844
168 | 168 0 34845 34965
169 | 169 0 34966 35193
170 | 170 0 35194 35298
171 | 171 0 35299 35475
172 | 172 0 35476 35649
173 | 173 0 35650 35837
174 | 174 0 35838 36063
175 | 175 0 36064 36169
176 | 176 0 36170 36411
177 | 177 0 36412 36580
178 | 178 0 36581 36754
179 | 179 0 36755 36963
180 | 180 0 36964 37176
181 | 181 0 37177 37313
182 | 182 0 37314 37419
183 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6370853.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1458
2 | 2 0 1459 1570
3 | 3 0 1571 1782
4 | 4 0 1783 2078
5 | 5 0 2079 2260
6 | 6 0 2261 2378
7 | 7 0 2379 2787
8 | 8 0 2788 3183
9 | 9 0 3184 3259
10 | 10 0 3260 3399
11 | 11 0 3400 3509
12 | 12 0 3510 3764
13 | 13 0 3765 3942
14 | 14 0 3943 4108
15 | 15 0 4109 4357
16 | 16 0 4358 4481
17 | 17 0 4482 4724
18 | 18 0 4725 4911
19 | 19 0 4912 6806
20 | 20 0 6807 6810
21 | 21 0 6811 6879
22 | 22 0 6880 7078
23 | 23 0 7079 7192
24 | 24 0 7193 7426
25 | 25 0 7427 7617
26 | 26 0 7618 7843
27 | 27 0 7844 8027
28 | 28 0 8028 8274
29 | 29 0 8275 8531
30 | 30 0 8532 8735
31 | 31 0 8736 8840
32 | 32 0 8841 9068
33 | 33 0 9069 9131
34 | 34 0 9132 10162
35 | 35 0 10163 10294
36 | 36 0 10295 10515
37 | 37 0 10516 10609
38 | 38 0 10610 10853
39 | 39 0 10854 11047
40 | 40 0 11048 11158
41 | 41 0 11159 11263
42 | 42 0 11264 11358
43 | 43 0 11359 11532
44 | 44 0 11533 12074
45 | 45 0 12075 12306
46 | 46 0 12307 13322
47 | 47 0 13323 13384
48 | 48 0 13385 14362
49 | 49 0 14363 16246
50 | 50 0 16247 16390
51 | 51 0 16391 16574
52 | 52 0 16575 16686
53 | 53 0 16687 16753
54 | 54 0 16754 16908
55 | 55 0 16909 17001
56 | 56 0 17002 17131
57 | 57 0 17132 17278
58 | 58 0 17279 17379
59 | 59 0 17380 17572
60 | 60 0 17573 17711
61 | 61 0 17712 17937
62 | 62 0 17938 18308
63 | 63 0 18309 18368
64 | 64 0 18369 18457
65 | 65 0 18458 18596
66 | 66 0 18597 18768
67 | 67 0 18769 18985
68 | 68 0 18986 19164
69 | 69 0 19165 19452
70 | 70 0 19453 19623
71 | 71 0 19624 19772
72 | 72 0 19773 19907
73 | 73 0 19908 20285
74 | 74 0 20286 21011
75 | 75 0 21012 21192
76 | 76 0 21193 21486
77 | 77 0 21487 21668
78 | 78 0 21669 21798
79 | 79 0 21799 21960
80 | 80 0 21961 22146
81 | 81 0 22147 22325
82 | 82 0 22326 22504
83 | 83 0 22505 23211
84 | 84 0 23212 23318
85 | 85 0 23319 23545
86 | 86 0 23546 23646
87 | 87 0 23647 23861
88 | 88 0 23862 24001
89 | 89 0 24002 24150
90 | 90 0 24151 24388
91 | 91 0 24389 24510
92 | 92 0 24511 24713
93 | 93 0 24714 24804
94 | 94 0 24805 25083
95 | 95 0 25084 25213
96 | 96 0 25214 25386
97 | 97 0 25387 25606
98 | 98 0 25607 26585
99 | 99 0 26586 26756
100 | 100 0 26757 26905
101 | 101 0 26906 27120
102 | 102 0 27121 27191
103 | 103 0 27192 27280
104 | 104 1 27281 27522
105 | 105 1 27523 27684
106 | 106 1 27685 27859
107 | 107 0 27860 28082
108 | 108 0 28083 28391
109 | 109 0 28392 29008
110 | 110 0 29009 29198
111 | 111 1 29199 29530
112 | 112 0 29531 29610
113 | 113 0 29611 29812
114 | 114 0 29813 29938
115 | 115 0 29939 30098
116 | 116 1 30099 30350
117 | 117 0 30351 30528
118 | 118 0 30529 30749
119 | 119 0 30750 30933
120 | 120 0 30934 31127
121 | 121 0 31128 31312
122 | 122 0 31313 31467
123 | 123 0 31468 31674
124 | 124 0 31675 31840
125 | 125 0 31841 33050
126 | 126 0 33051 33099
127 | 127 0 33100 33303
128 | 128 0 33304 33481
129 | 129 0 33482 33821
130 | 130 1 33822 34055
131 | 131 0 34056 34192
132 | 132 0 34193 34493
133 | 133 0 34494 34663
134 | 134 0 34664 34906
135 | 135 0 34907 35106
136 | 136 0 35107 35182
137 | 137 0 35183 35280
138 | 138 0 35281 35446
139 | 139 0 35447 35500
140 | 140 0 35501 35672
141 | 141 0 35673 35751
142 | 142 0 35752 35857
143 | 143 0 35858 36073
144 | 144 0 36074 36284
145 | 145 0 36285 36352
146 | 146 0 36353 36432
147 | 147 0 36433 36633
148 | 148 0 36634 36716
149 | 149 0 36717 36785
150 | 150 0 36786 36865
151 | 151 0 36866 37080
152 | 152 0 37081 37244
153 | 153 0 37245 37325
154 | 154 0 37326 37499
155 | 155 0 37500 37623
156 | 156 0 37624 37740
157 | 157 0 37741 37827
158 | 158 0 37828 37957
159 | 159 0 37958 38120
160 | 160 0 38121 38253
161 | 161 0 38254 38392
162 | 162 0 38393 38507
163 | 163 0 38508 38640
164 | 164 0 38641 38702
165 | 165 0 38703 38806
166 | 166 0 38807 38891
167 | 167 0 38892 39024
168 | 168 0 39025 39210
169 | 169 0 39211 39355
170 | 170 0 39356 39475
171 | 171 0 39476 39597
172 | 172 0 39598 39770
173 | 173 0 39771 39838
174 | 174 0 39839 40056
175 | 175 0 40057 40228
176 | 176 0 40229 40371
177 | 177 0 40372 40495
178 | 178 0 40496 40672
179 | 179 0 40673 42251
180 | 180 0 42252 42288
181 | 181 0 42289 42425
182 | 182 0 42426 42486
183 | 183 0 42487 42592
184 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6247067.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1040
2 | 2 0 1041 1148
3 | 3 0 1149 1339
4 | 4 0 1340 1635
5 | 5 1 1636 1749
6 | 6 1 1750 1823
7 | 7 1 1824 2072
8 | 8 0 2073 2338
9 | 9 0 2339 2751
10 | 10 0 2752 2759
11 | 11 0 2760 2769
12 | 12 0 2770 2780
13 | 13 0 2781 2790
14 | 14 0 2791 2803
15 | 15 0 2804 2806
16 | 16 0 2807 2820
17 | 17 0 2821 2828
18 | 18 0 2829 2840
19 | 19 0 2841 3049
20 | 20 0 3050 3298
21 | 21 0 3299 3525
22 | 22 0 3526 3720
23 | 23 0 3721 3989
24 | 24 0 3990 4142
25 | 25 0 4143 4788
26 | 26 0 4789 5050
27 | 27 0 5051 5230
28 | 28 1 5231 6310
29 | 29 1 6311 7074
30 | 30 0 7075 7293
31 | 31 0 7294 7423
32 | 32 0 7424 7817
33 | 33 0 7818 7943
34 | 34 0 7944 8140
35 | 35 0 8141 8265
36 | 36 0 8266 8369
37 | 37 0 8370 8484
38 | 38 0 8485 8817
39 | 39 1 8818 9258
40 | 40 0 9259 9649
41 | 41 0 9650 9801
42 | 42 0 9802 9977
43 | 43 0 9978 10088
44 | 44 0 10089 10276
45 | 45 0 10277 10424
46 | 46 0 10425 10831
47 | 47 1 10832 11149
48 | 48 1 11150 11563
49 | 49 0 11564 11656
50 | 50 1 11657 11859
51 | 51 1 11860 12001
52 | 52 0 12002 12162
53 | 53 0 12163 12468
54 | 54 0 12469 12624
55 | 55 0 12625 12694
56 | 56 0 12695 12852
57 | 57 0 12853 12969
58 | 58 0 12970 13076
59 | 59 0 13077 13255
60 | 60 0 13256 13354
61 | 61 0 13355 13513
62 | 62 0 13514 13832
63 | 63 0 13833 14063
64 | 64 0 14064 14200
65 | 65 0 14201 14381
66 | 66 0 14382 14510
67 | 67 0 14511 14656
68 | 68 0 14657 14783
69 | 69 0 14784 14925
70 | 70 0 14926 15119
71 | 71 0 15120 15266
72 | 72 0 15267 15472
73 | 73 0 15473 15601
74 | 74 0 15602 15776
75 | 75 0 15777 15863
76 | 76 0 15864 15985
77 | 77 0 15986 16094
78 | 78 0 16095 16259
79 | 79 0 16260 16713
80 | 80 0 16714 16919
81 | 81 0 16920 17147
82 | 82 0 17148 17329
83 | 83 0 17330 17462
84 | 84 0 17463 17555
85 | 85 0 17556 17737
86 | 86 0 17738 17858
87 | 87 0 17859 18006
88 | 88 0 18007 18211
89 | 89 0 18212 18316
90 | 90 0 18317 18498
91 | 91 0 18499 18792
92 | 92 0 18793 18953
93 | 93 0 18954 20120
94 | 94 0 20121 20247
95 | 95 0 20248 20389
96 | 96 0 20390 20692
97 | 97 0 20693 20807
98 | 98 0 20808 20915
99 | 99 0 20916 21060
100 | 100 0 21061 21211
101 | 101 0 21212 21306
102 | 102 0 21307 21829
103 | 103 0 21830 21988
104 | 104 0 21989 22283
105 | 105 0 22284 22433
106 | 106 0 22434 22642
107 | 107 0 22643 22799
108 | 108 0 22800 22886
109 | 109 0 22887 23460
110 | 110 0 23461 23556
111 | 111 0 23557 23717
112 | 112 0 23718 23884
113 | 113 0 23885 23998
114 | 114 0 23999 24233
115 | 115 0 24234 24341
116 | 116 0 24342 24528
117 | 117 0 24529 24641
118 | 118 0 24642 24762
119 | 119 0 24763 24917
120 | 120 0 24918 25008
121 | 121 0 25009 25118
122 | 122 0 25119 25312
123 | 123 0 25313 25434
124 | 124 0 25435 25541
125 | 125 0 25542 25692
126 | 126 0 25693 25773
127 | 127 0 25774 25938
128 | 128 0 25939 26129
129 | 129 0 26130 26199
130 | 130 0 26200 26259
131 | 131 0 26260 26413
132 | 132 0 26414 26563
133 | 133 0 26564 27016
134 | 134 0 27017 27122
135 | 135 0 27123 27287
136 | 136 0 27288 27411
137 | 137 0 27412 27624
138 | 138 0 27625 27728
139 | 139 0 27729 27828
140 | 140 0 27829 28138
141 | 141 0 28139 28221
142 | 142 0 28222 28375
143 | 143 0 28376 28622
144 | 144 0 28623 28688
145 | 145 0 28689 28832
146 | 146 0 28833 29029
147 | 147 0 29030 29133
148 | 148 0 29134 29269
149 | 149 0 29270 29524
150 | 150 0 29525 29741
151 | 151 0 29742 30340
152 | 152 0 30341 30592
153 | 153 0 30593 30843
154 | 154 0 30844 30983
155 | 155 0 30984 31235
156 | 156 0 31236 31314
157 | 157 0 31315 31463
158 | 158 0 31464 31540
159 | 159 0 31541 31642
160 | 160 0 31643 31755
161 | 161 0 31756 32227
162 | 162 0 32228 32419
163 | 163 0 32420 32536
164 | 164 0 32537 32625
165 | 165 0 32626 32877
166 | 166 0 32878 33029
167 | 167 0 33030 33148
168 | 168 0 33149 33222
169 | 169 0 33223 33557
170 | 170 0 33558 33591
171 | 171 0 33592 33811
172 | 172 0 33812 33894
173 | 173 0 33895 33983
174 | 174 0 33984 34220
175 | 175 0 34221 34342
176 | 176 0 34343 34470
177 | 177 0 34471 34566
178 | 178 0 34567 34647
179 | 179 0 34648 34773
180 | 180 0 34774 34848
181 | 181 0 34849 35049
182 | 182 0 35050 35248
183 | 183 0 35249 35310
184 | 184 0 35311 35496
185 | 185 0 35497 35639
186 | 186 0 35640 35705
187 | 187 0 35706 35779
188 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6247067.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 5 68 77
2 | SPAN 2 DEVICE 5 92 99
3 | SPAN 3 VALUE 5 100 112
4 | SPAN 4 VALUE 6 3 9
5 | SPAN 5 EXPERIMENT:current_exp 6 44 51
6 | SPAN 6 VALUE 6 52 63
7 | SPAN 7 VALUE 6 67 72
8 | SPAN 8 EXPERIMENT:current_exp 7 72 80
9 | SPAN 9 VALUE 7 126 131
10 | SPAN 10 VALUE 7 135 141
11 | SPAN 11 VALUE 7 151 155
12 | SPAN 12 MATERIAL 28 574 580
13 | SPAN 13 EXPERIMENT:general_info 28 591 595
14 | SPAN 14 MATERIAL 28 603 611
15 | SPAN 15 DEVICE 28 626 631
16 | SPAN 16 DEVICE 28 646 653
17 | SPAN 17 DEVICE 28 771 778
18 | SPAN 18 MATERIAL 28 958 967
19 | SPAN 19 MATERIAL 28 968 977
20 | SPAN 20 MATERIAL 28 982 991
21 | SPAN 21 EXPERIMENT:general_info 28 1002 1009
22 | SPAN 22 DEVICE 28 1038 1043
23 | SPAN 23 VALUE 28 1068 1078
24 | SPAN 24 MATERIAL 29 39 45
25 | SPAN 25 MATERIAL 29 50 56
26 | SPAN 26 MATERIAL 29 58 62
27 | SPAN 27 EXPERIMENT:general_info 29 64 69
28 | SPAN 28 MATERIAL 29 218 222
29 | SPAN 29 EXPERIMENT:previous_work 29 272 281
30 | SPAN 30 MATERIAL 29 308 335
31 | SPAN 31 MATERIAL 29 337 343
32 | SPAN 32 DEVICE 29 349 354
33 | SPAN 33 EXPERIMENT:previous_work 29 517 525
34 | SPAN 34 DEVICE 29 537 541
35 | SPAN 35 MATERIAL 29 548 554
36 | SPAN 36 MATERIAL 29 559 586
37 | SPAN 37 MATERIAL 29 588 593
38 | SPAN 38 EXPERIMENT:previous_work 39 168 176
39 | SPAN 39 DEVICE 39 222 227
40 | SPAN 40 EXPERIMENT:previous_work 39 229 239
41 | SPAN 41 VALUE 39 282 288
42 | SPAN 42 MATERIAL 46 81 106
43 | SPAN 43 MATERIAL 46 108 112
44 | SPAN 44 MATERIAL 46 117 129
45 | SPAN 45 MATERIAL 46 131 135
46 | SPAN 46 EXPERIMENT:previous_work 47 12 20
47 | SPAN 47 MATERIAL 47 37 63
48 | SPAN 48 MATERIAL 47 65 70
49 | SPAN 49 EXPERIMENT:previous_work 47 96 103
50 | SPAN 50 DEVICE 47 107 111
51 | SPAN 51 VALUE 47 146 157
52 | SPAN 52 VALUE 47 161 167
53 | SPAN 53 EXPERIMENT:previous_work 47 191 199
54 | SPAN 54 MATERIAL 47 218 245
55 | SPAN 55 MATERIAL 47 247 252
56 | SPAN 56 EXPERIMENT:previous_work 47 277 289
57 | SPAN 57 DEVICE 47 300 304
58 | SPAN 58 EXPERIMENT:current_exp 48 261 270
59 | SPAN 59 EXPERIMENT:current_exp 48 336 346
60 | SPAN 60 MATERIAL 48 357 362
61 | SPAN 61 DEVICE 50 4 8
62 | SPAN 62 EXPERIMENT:current_exp 50 9 19
63 | SPAN 63 MATERIAL 50 30 35
64 | SPAN 64 MATERIAL 50 61 67
65 | SPAN 65 MATERIAL 50 85 94
66 | SPAN 66 EXPERIMENT:current_exp 50 118 130
67 | SPAN 67 VALUE 50 189 201
68 | SPAN 68 EXPERIMENT:current_exp 51 31 40
69 | SPAN 69 MATERIAL 51 44 54
70 | SPAN 70 MATERIAL 51 95 107
71 | EXPERIMENT 1 1
72 | device 2
73 | working_temperature 3
74 | EXPERIMENT 2 5
75 | working_temperature 4
76 | current_density 6
77 | voltage 7
78 | EXPERIMENT 3 8
79 | voltage 9
80 | working_temperature 10
81 | time_of_operation 11
82 | EXPERIMENT 4 13
83 | anode_material 12
84 | fuel_used 14
85 | device 15
86 | EXPERIMENT 5 21
87 | electrolyte_material 18
88 | electrolyte_material 19
89 | electrolyte_material 20
90 | device 22
91 | working_temperature 23
92 | EXPERIMENT 6 27
93 | electrolyte_material 24
94 | electrolyte_material 25
95 | EXPERIMENT 7 29
96 | anode_material 30
97 | device 32
98 | EXPERIMENT 8 33
99 | device 34
100 | electrolyte_material 35
101 | cathode_material 36
102 | EXPERIMENT 9 38
103 | device 39
104 | EXPERIMENT 10 40
105 | working_temperature 41
106 | EXPERIMENT 11 46
107 | cathode_material 47
108 | EXPERIMENT 12 49
109 | device 50
110 | power_density 51
111 | working_temperature 52
112 | EXPERIMENT 13 53
113 | cathode_material 54
114 | EXPERIMENT 14 56
115 | device 57
116 | EXPERIMENT 15 59
117 | anode_material 60
118 | EXPERIMENT 16 62
119 | device 61
120 | cathode_material 63
121 | electrolyte_material 64
122 | cathode_material 65
123 | EXPERIMENT 17 66
124 | working_temperature 67
125 | EXPERIMENT 18 68
126 | fuel_used 69
127 | fuel_used 70
128 | LINK experiment_variation 5 1
129 | LINK experiment_variation 27 21
130 | LINK same_experiment 38 40
131 | LINK same_experiment 46 49
132 | LINK same_experiment 53 56
133 | LINK same_experiment 58 59
134 | LINK same_experiment 62 66
135 | LINK coreference 25 26
136 | LINK coreference 30 31
137 | LINK coreference 36 37
138 | LINK coreference 42 43
139 | LINK coreference 44 45
140 | LINK coreference 47 48
141 | LINK coreference 54 55
142 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5457058.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 585
2 | 2 0 586 1044
3 | 3 1 1045 1167
4 | 4 0 1168 1318
5 | 5 0 1319 1581
6 | 6 0 1582 1699
7 | 7 1 1700 1830
8 | 8 1 1831 2014
9 | 9 1 2015 2232
10 | 10 0 2233 2360
11 | 11 0 2361 2415
12 | 12 0 2416 2610
13 | 13 0 2611 2755
14 | 14 0 2756 2857
15 | 15 0 2858 3202
16 | 16 0 3203 3481
17 | 17 0 3482 3603
18 | 18 0 3604 3771
19 | 19 0 3772 3902
20 | 20 0 3903 4228
21 | 21 0 4229 4463
22 | 22 1 4464 4748
23 | 23 0 4749 4965
24 | 24 0 4966 5189
25 | 25 0 5190 5290
26 | 26 0 5291 5467
27 | 27 0 5468 5503
28 | 28 0 5504 5594
29 | 29 0 5595 5728
30 | 30 0 5729 6111
31 | 31 0 6112 6328
32 | 32 0 6329 6587
33 | 33 0 6588 6720
34 | 34 0 6721 6787
35 | 35 0 6788 6831
36 | 36 0 6832 6935
37 | 37 0 6936 7005
38 | 38 0 7006 7313
39 | 39 0 7314 7381
40 | 40 0 7382 7483
41 | 41 0 7484 7603
42 | 42 0 7604 7759
43 | 43 0 7760 7897
44 | 44 0 7898 8071
45 | 45 0 8072 8222
46 | 46 0 8223 8339
47 | 47 0 8340 8538
48 | 48 0 8539 8630
49 | 49 0 8631 8694
50 | 50 0 8695 8878
51 | 51 0 8879 8942
52 | 52 0 8943 8986
53 | 53 0 8987 9165
54 | 54 0 9166 9240
55 | 55 0 9241 9357
56 | 56 0 9358 9483
57 | 57 0 9484 9636
58 | 58 0 9637 9848
59 | 59 0 9849 9986
60 | 60 0 9987 10149
61 | 61 0 10150 10251
62 | 62 0 10252 10370
63 | 63 0 10371 10506
64 | 64 1 10507 10753
65 | 65 1 10754 10947
66 | 66 0 10948 11081
67 | 67 0 11082 11211
68 | 68 0 11212 11349
69 | 69 1 11350 11492
70 | 70 0 11493 11569
71 | 71 0 11570 11759
72 | 72 0 11760 11852
73 | 73 0 11853 11953
74 | 74 0 11954 12021
75 | 75 0 12022 12167
76 | 76 1 12168 12322
77 | 77 0 12323 12535
78 | 78 0 12536 12653
79 | 79 0 12654 12681
80 | 80 0 12682 12835
81 | 81 0 12836 12869
82 | 82 0 12870 13049
83 | 83 0 13050 13148
84 | 84 0 13149 13418
85 | 85 0 13419 13516
86 | 86 0 13517 13607
87 | 87 0 13608 13764
88 | 88 0 13765 13920
89 | 89 0 13921 14154
90 | 90 0 14155 14285
91 | 91 0 14286 14362
92 | 92 0 14363 14413
93 | 93 0 14414 14641
94 | 94 0 14642 14707
95 | 95 0 14708 14778
96 | 96 0 14779 14944
97 | 97 0 14945 15258
98 | 98 0 15259 15520
99 | 99 0 15521 15633
100 | 100 0 15634 15957
101 | 101 0 15958 16183
102 | 102 0 16184 16402
103 | 103 0 16403 16493
104 | 104 0 16494 16879
105 | 105 0 16880 16987
106 | 106 0 16988 17105
107 | 107 0 17106 17175
108 | 108 0 17176 17310
109 | 109 0 17311 17450
110 | 110 0 17451 17527
111 | 111 0 17528 17716
112 | 112 0 17717 17837
113 | 113 0 17838 18021
114 | 114 0 18022 18288
115 | 115 0 18289 18657
116 | 116 0 18658 18867
117 | 117 0 18868 19022
118 | 118 0 19023 19201
119 | 119 0 19202 19407
120 | 120 0 19408 19551
121 | 121 0 19552 19694
122 | 122 0 19695 19862
123 | 123 0 19863 19915
124 | 124 0 19916 20030
125 | 125 0 20031 20139
126 | 126 0 20140 20175
127 | 127 0 20176 20293
128 | 128 0 20294 20362
129 | 129 0 20363 20503
130 | 130 0 20504 20705
131 | 131 0 20706 20781
132 | 132 0 20782 20809
133 | 133 0 20810 21061
134 | 134 0 21062 21265
135 | 135 0 21266 21485
136 | 136 0 21486 21669
137 | 137 0 21670 21740
138 | 138 0 21741 21938
139 | 139 0 21939 22172
140 | 140 0 22173 22510
141 | 141 0 22511 22742
142 | 142 0 22743 22865
143 | 143 0 22866 23176
144 | 144 1 23177 23374
145 | 145 1 23375 23472
146 | 146 0 23473 23699
147 | 147 0 23700 23779
148 | 148 0 23780 23883
149 | 149 0 23884 23957
150 | 150 0 23958 24059
151 | 151 0 24060 24186
152 | 152 0 24187 24274
153 | 153 0 24275 24347
154 | 154 0 24348 24776
155 | 155 0 24777 24858
156 | 156 0 24859 24922
157 | 157 0 24923 25088
158 | 158 0 25089 25209
159 | 159 0 25210 25291
160 | 160 0 25292 25356
161 | 161 0 25357 25436
162 | 162 0 25437 25510
163 | 163 0 25511 25610
164 | 164 0 25611 25807
165 | 165 0 25808 25947
166 | 166 0 25948 26093
167 | 167 0 26094 26216
168 | 168 0 26217 26441
169 | 169 0 26442 26725
170 | 170 1 26726 26804
171 | 171 0 26805 26853
172 | 172 0 26854 27088
173 | 173 1 27089 27242
174 | 174 0 27243 27377
175 | 175 0 27378 27579
176 | 176 0 27580 27796
177 | 177 0 27797 27935
178 | 178 0 27936 28026
179 | 179 0 28027 28142
180 | 180 1 28143 28354
181 | 181 1 28355 28493
182 | 182 0 28494 28552
183 | 183 1 28553 28667
184 | 184 0 28668 28810
185 | 185 0 28811 29132
186 | 186 0 29133 29602
187 | 187 0 29603 29763
188 | 188 1 29764 30107
189 | 189 1 30108 30339
190 | 190 1 30340 30474
191 | 191 0 30475 30676
192 | 192 0 30677 30831
193 | 193 0 30832 30971
194 | 194 0 30972 31096
195 | 195 1 31097 31299
196 | 196 0 31300 31534
197 | 197 1 31535 31678
198 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6517467.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 1021
2 | 2 0 1022 1286
3 | 3 1 1287 1589
4 | 4 1 1590 1675
5 | 5 0 1676 1805
6 | 6 0 1806 1962
7 | 7 0 1963 2064
8 | 8 0 2065 2234
9 | 9 0 2235 2648
10 | 10 0 2649 2934
11 | 11 0 2935 3107
12 | 12 1 3108 3267
13 | 13 0 3268 3393
14 | 14 0 3394 3622
15 | 15 0 3623 3756
16 | 16 0 3757 3910
17 | 17 0 3911 4201
18 | 18 0 4202 4357
19 | 19 0 4358 4542
20 | 20 0 4543 4662
21 | 21 1 4663 4911
22 | 22 0 4912 5028
23 | 23 0 5029 5188
24 | 24 0 5189 5344
25 | 25 0 5345 5566
26 | 26 0 5567 5697
27 | 27 0 5698 5877
28 | 28 0 5878 5975
29 | 29 0 5976 6090
30 | 30 0 6091 6206
31 | 31 0 6207 6336
32 | 32 0 6337 6501
33 | 33 0 6502 6750
34 | 34 0 6751 6900
35 | 35 0 6901 7079
36 | 36 0 7080 7221
37 | 37 0 7222 7305
38 | 38 0 7306 7396
39 | 39 0 7397 7494
40 | 40 0 7495 7595
41 | 41 0 7596 7686
42 | 42 0 7687 7850
43 | 43 0 7851 8077
44 | 44 0 8078 8260
45 | 45 0 8261 8492
46 | 46 0 8493 8578
47 | 47 0 8579 8711
48 | 48 0 8712 8786
49 | 49 0 8787 8920
50 | 50 0 8921 9065
51 | 51 0 9066 9323
52 | 52 0 9324 9531
53 | 53 0 9532 9583
54 | 54 0 9584 10178
55 | 55 0 10179 10274
56 | 56 0 10275 10573
57 | 57 0 10574 10695
58 | 58 0 10696 10800
59 | 59 0 10801 10989
60 | 60 0 10990 11322
61 | 61 0 11323 11600
62 | 62 0 11601 11839
63 | 63 0 11840 11955
64 | 64 0 11956 12203
65 | 65 0 12204 12394
66 | 66 0 12395 12700
67 | 67 0 12701 12870
68 | 68 0 12871 13061
69 | 69 0 13062 13147
70 | 70 0 13148 13366
71 | 71 0 13367 13489
72 | 72 0 13490 13617
73 | 73 0 13618 13799
74 | 74 0 13800 13920
75 | 75 0 13921 14020
76 | 76 0 14021 14147
77 | 77 0 14148 14304
78 | 78 0 14305 14497
79 | 79 0 14498 14705
80 | 80 0 14706 14839
81 | 81 0 14840 15076
82 | 82 0 15077 15379
83 | 83 0 15380 15449
84 | 84 0 15450 15684
85 | 85 0 15685 15988
86 | 86 0 15989 16195
87 | 87 0 16196 16338
88 | 88 0 16339 16461
89 | 89 0 16462 16631
90 | 90 0 16632 16724
91 | 91 0 16725 16926
92 | 92 1 16927 17045
93 | 93 1 17046 17128
94 | 94 0 17129 17259
95 | 95 1 17260 17438
96 | 96 1 17439 17594
97 | 97 0 17595 17804
98 | 98 0 17805 18003
99 | 99 0 18004 18212
100 | 100 0 18213 18261
101 | 101 0 18262 18405
102 | 102 1 18406 18608
103 | 103 0 18609 18786
104 | 104 0 18787 18996
105 | 105 0 18997 19201
106 | 106 0 19202 19364
107 | 107 0 19365 19635
108 | 108 0 19636 19868
109 | 109 1 19869 20064
110 | 110 0 20065 20367
111 | 111 0 20368 20502
112 | 112 0 20503 20625
113 | 113 0 20626 20841
114 | 114 0 20842 20978
115 | 115 0 20979 21437
116 | 116 0 21438 21539
117 | 117 0 21540 21655
118 | 118 0 21656 21828
119 | 119 0 21829 21975
120 | 120 0 21976 22218
121 | 121 1 22219 22333
122 | 122 0 22334 22534
123 | 123 0 22535 22683
124 | 124 1 22684 22919
125 | 125 1 22920 23021
126 | 126 0 23022 23255
127 | 127 0 23256 23314
128 | 128 0 23315 23373
129 | 129 0 23374 23588
130 | 130 0 23589 23658
131 | 131 0 23659 24012
132 | 132 0 24013 24530
133 | 133 0 24531 24855
134 | 134 0 24856 24893
135 | 135 0 24894 25028
136 | 136 0 25029 25071
137 | 137 0 25072 25387
138 | 138 0 25388 25599
139 | 139 0 25600 25776
140 | 140 0 25777 25927
141 | 141 0 25928 26148
142 | 142 0 26149 26242
143 | 143 0 26243 26835
144 | 144 0 26836 27110
145 | 145 0 27111 27617
146 | 146 0 27618 27697
147 | 147 0 27698 27861
148 | 148 0 27862 27998
149 | 149 0 27999 28301
150 | 150 0 28302 28437
151 | 151 0 28438 28705
152 | 152 0 28706 28888
153 | 153 0 28889 29147
154 | 154 0 29148 29233
155 | 155 0 29234 29360
156 | 156 1 29361 29529
157 | 157 0 29530 29761
158 | 158 0 29762 29908
159 | 159 0 29909 30035
160 | 160 0 30036 30201
161 | 161 0 30202 30554
162 | 162 0 30555 30812
163 | 163 0 30813 30951
164 | 164 0 30952 31105
165 | 165 0 31106 31259
166 | 166 0 31260 31377
167 | 167 0 31378 31515
168 | 168 0 31516 31636
169 | 169 0 31637 31843
170 | 170 0 31844 31936
171 | 171 0 31937 32054
172 | 172 0 32055 32159
173 | 173 0 32160 32353
174 | 174 1 32354 32572
175 | 175 1 32573 32786
176 | 176 0 32787 32954
177 | 177 0 32955 33124
178 | 178 0 33125 33225
179 | 179 0 33226 33374
180 | 180 0 33375 33420
181 | 181 0 33421 33477
182 | 182 0 33478 33488
183 | 183 0 33489 33545
184 | 184 0 33546 33556
185 | 185 0 33557 33613
186 | 186 0 33614 33624
187 | 187 0 33625 33675
188 | 188 0 33676 33686
189 | 189 0 33687 33752
190 | 190 0 33753 34428
191 | 191 0 34429 34989
192 | 192 0 34990 35073
193 | 193 0 35074 35125
194 | 194 0 35126 35178
195 | 195 0 35179 35242
196 | 196 0 35243 35321
197 | 197 0 35322 35457
198 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5456601.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 DEVICE 11 39 61
2 | SPAN 2 DEVICE 11 63 68
3 | SPAN 3 EXPERIMENT:general_info 11 71 76
4 | SPAN 4 MATERIAL 11 110 117
5 | SPAN 5 EXPERIMENT:general_info 11 132 139
6 | SPAN 6 VALUE 11 162 179
7 | SPAN 7 EXPERIMENT:current_exp 11 221 224
8 | SPAN 8 MATERIAL 11 238 249
9 | SPAN 9 MATERIAL 16 21 41
10 | SPAN 10 MATERIAL 16 43 46
11 | SPAN 11 DEVICE 16 71 75
12 | SPAN 12 DEVICE 17 49 54
13 | SPAN 13 EXPERIMENT:general_info 17 55 63
14 | SPAN 14 VALUE 17 67 74
15 | SPAN 15 MATERIAL 58 187 193
16 | SPAN 16 MATERIAL 58 195 201
17 | SPAN 17 MATERIAL 58 206 212
18 | SPAN 18 MATERIAL 59 41 55
19 | SPAN 19 MATERIAL 59 59 73
20 | SPAN 20 EXPERIMENT:previous_work 61 20 28
21 | SPAN 21 EXPERIMENT:previous_work 61 107 114
22 | SPAN 22 MATERIAL 61 120 144
23 | SPAN 23 MATERIAL 61 146 150
24 | SPAN 24 EXPERIMENT:previous_work 61 152 157
25 | SPAN 25 MATERIAL 61 216 240
26 | SPAN 26 MATERIAL 61 242 246
27 | SPAN 27 EXPERIMENT:general_info 61 264 268
28 | SPAN 28 DEVICE 61 298 302
29 | SPAN 29 MATERIAL 63 54 70
30 | SPAN 30 EXPERIMENT:general_info 64 38 42
31 | SPAN 31 DEVICE 64 47 51
32 | SPAN 32 EXPERIMENT:general_info 64 67 74
33 | SPAN 33 MATERIAL 64 75 99
34 | SPAN 34 MATERIAL 64 101 105
35 | SPAN 35 MATERIAL 66 97 128
36 | SPAN 36 MATERIAL 66 144 171
37 | SPAN 37 EXPERIMENT:previous_work 68 125 133
38 | SPAN 38 MATERIAL 68 159 171
39 | SPAN 39 MATERIAL 68 181 192
40 | SPAN 40 MATERIAL 69 79 82
41 | SPAN 41 EXPERIMENT:previous_work 69 93 105
42 | SPAN 42 MATERIAL 72 41 54
43 | SPAN 43 MATERIAL 72 59 73
44 | SPAN 44 DEVICE 74 49 65
45 | SPAN 45 DEVICE 75 8 12
46 | SPAN 46 MATERIAL 75 16 21
47 | SPAN 47 MATERIAL 75 48 61
48 | SPAN 48 MATERIAL 75 76 98
49 | SPAN 49 MATERIAL 75 100 104
50 | SPAN 50 VALUE 75 125 131
51 | SPAN 51 MATERIAL 75 155 169
52 | SPAN 52 EXPERIMENT:current_exp 75 185 189
53 | SPAN 53 EXPERIMENT:current_exp 76 144 146
54 | SPAN 54 VALUE 76 177 183
55 | SPAN 55 MATERIAL 77 31 35
56 | SPAN 56 VALUE 77 84 91
57 | SPAN 57 VALUE 77 95 100
58 | SPAN 58 EXPERIMENT:current_exp 77 104 112
59 | SPAN 59 MATERIAL 95 250 256
60 | SPAN 60 MATERIAL 95 261 267
61 | SPAN 61 MATERIAL 100 30 44
62 | SPAN 62 MATERIAL 101 29 51
63 | SPAN 63 MATERIAL 176 291 329
64 | SPAN 64 MATERIAL 193 125 141
65 | SPAN 65 MATERIAL 194 38 62
66 | SPAN 66 MATERIAL 194 64 68
67 | SPAN 67 EXPERIMENT:general_info 194 77 82
68 | SPAN 68 EXPERIMENT:general_info 194 208 212
69 | SPAN 69 DEVICE 194 233 263
70 | SPAN 70 MATERIAL 198 30 42
71 | SPAN 71 MATERIAL 214 110 124
72 | SPAN 72 MATERIAL 214 252 261
73 | SPAN 73 MATERIAL 215 157 180
74 | SPAN 74 MATERIAL 215 182 186
75 | SPAN 75 MATERIAL 215 192 205
76 | SPAN 76 MATERIAL 215 207 210
77 | SPAN 77 MATERIAL 223 75 79
78 | SPAN 78 MATERIAL 235 51 62
79 | SPAN 79 MATERIAL 235 64 68
80 | SPAN 80 MATERIAL 235 74 83
81 | SPAN 81 MATERIAL 235 85 88
82 | EXPERIMENT 1 3
83 | device 1
84 | electrolyte_material 4
85 | EXPERIMENT 2 5
86 | working_temperature 6
87 | EXPERIMENT 3 7
88 | fuel_used 8
89 | EXPERIMENT 4 13
90 | device 12
91 | working_temperature 14
92 | EXPERIMENT 5 24
93 | cathode_material 22
94 | EXPERIMENT 6 27
95 | cathode_material 25
96 | device 28
97 | EXPERIMENT 7 30
98 | device 31
99 | EXPERIMENT 8 32
100 | cathode_material 33
101 | EXPERIMENT 9 37
102 | cathode_material 38
103 | cathode_material 39
104 | EXPERIMENT 10 41
105 | cathode_material 40
106 | EXPERIMENT 11 52
107 | device 45
108 | support_material 46
109 | interlayer_material 47
110 | electrolyte_material 48
111 | cathode_material 51
112 | EXPERIMENT 12 53
113 | open_circuit_voltage 54
114 | EXPERIMENT 13 58
115 | electrolyte_material 55
116 | power_density 56
117 | working_temperature 57
118 | EXPERIMENT 14 67
119 | cathode_material 65
120 | EXPERIMENT 15 68
121 | device 69
122 | LINK same_experiment 3 5
123 | LINK same_experiment 5 7
124 | LINK same_experiment 20 21
125 | LINK experiment_variation 27 24
126 | LINK same_experiment 21 24
127 | LINK same_experiment 30 32
128 | LINK thickness 48 50
129 | LINK same_experiment 52 53
130 | LINK same_experiment 53 58
131 | LINK same_experiment 67 68
132 | LINK coreference 1 2
133 | LINK coreference 9 10
134 | LINK coreference 22 23
135 | LINK coreference 25 26
136 | LINK coreference 33 34
137 | LINK coreference 44 45
138 | LINK coreference 48 49
139 | LINK coreference 65 66
140 | LINK coreference 73 74
141 | LINK coreference 75 76
142 | LINK coreference 78 79
143 | LINK coreference 80 81
144 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6249295.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 4 32 58
2 | SPAN 2 MATERIAL 4 60 65
3 | SPAN 3 EXPERIMENT:current_exp 4 71 75
4 | SPAN 4 DEVICE 4 112 116
5 | SPAN 5 DEVICE 5 41 45
6 | SPAN 6 EXPERIMENT:current_exp 5 46 51
7 | SPAN 7 VALUE 5 93 104
8 | SPAN 8 VALUE 5 108 114
9 | SPAN 9 VALUE 5 138 143
10 | SPAN 10 VALUE 6 56 68
11 | SPAN 11 VALUE 6 89 101
12 | SPAN 12 EXPERIMENT:current_exp 6 139 147
13 | SPAN 13 VALUE 6 169 174
14 | SPAN 14 VALUE 6 178 184
15 | SPAN 15 DEVICE 35 69 73
16 | SPAN 16 EXPERIMENT:current_exp 35 79 89
17 | SPAN 17 MATERIAL 35 142 150
18 | SPAN 18 MATERIAL 35 173 199
19 | SPAN 19 MATERIAL 35 201 206
20 | SPAN 20 MATERIAL 35 208 212
21 | SPAN 21 DEVICE 71 22 26
22 | SPAN 22 EXPERIMENT:current_exp 71 31 39
23 | SPAN 23 VALUE 71 97 102
24 | SPAN 24 VALUE 71 104 109
25 | SPAN 25 VALUE 71 115 126
26 | SPAN 26 VALUE 71 130 133
27 | SPAN 27 VALUE 71 135 138
28 | SPAN 28 VALUE 71 140 146
29 | SPAN 29 EXPERIMENT:current_exp 72 63 73
30 | SPAN 30 VALUE 72 142 147
31 | SPAN 31 VALUE 72 149 154
32 | SPAN 32 VALUE 72 160 171
33 | SPAN 33 VALUE 72 175 178
34 | SPAN 34 VALUE 72 180 183
35 | SPAN 35 VALUE 72 185 191
36 | SPAN 36 MATERIAL 76 30 40
37 | SPAN 37 EXPERIMENT:current_exp 76 41 45
38 | SPAN 38 VALUE 76 46 51
39 | SPAN 39 VALUE 76 53 58
40 | SPAN 40 VALUE 76 64 75
41 | SPAN 41 VALUE 76 79 82
42 | SPAN 42 VALUE 76 84 87
43 | SPAN 43 VALUE 76 93 99
44 | SPAN 44 EXPERIMENT:current_exp 81 10 15
45 | SPAN 45 DEVICE 81 99 103
46 | SPAN 46 MATERIAL 81 120 122
47 | SPAN 47 MATERIAL 81 135 138
48 | SPAN 48 EXPERIMENT:current_exp 82 68 76
49 | SPAN 49 VALUE 82 118 129
50 | SPAN 50 VALUE 82 133 139
51 | SPAN 51 DEVICE 82 234 238
52 | SPAN 52 MATERIAL 83 61 66
53 | SPAN 53 DEVICE 87 59 63
54 | SPAN 54 EXPERIMENT:current_exp 87 64 69
55 | SPAN 55 VALUE 87 116 128
56 | SPAN 56 VALUE 87 149 161
57 | SPAN 57 VALUE 87 215 220
58 | SPAN 58 VALUE 87 224 230
59 | SPAN 59 EXPERIMENT:current_exp 91 66 74
60 | SPAN 60 DEVICE 91 81 85
61 | SPAN 61 VALUE 91 150 156
62 | SPAN 62 MATERIAL 96 168 194
63 | SPAN 63 MATERIAL 96 196 201
64 | SPAN 64 MATERIAL 97 4 9
65 | SPAN 65 MATERIAL 97 40 66
66 | SPAN 66 MATERIAL 97 68 75
67 | SPAN 67 EXPERIMENT:current_exp 98 26 34
68 | SPAN 68 DEVICE 98 42 46
69 | SPAN 69 DEVICE 98 51 55
70 | SPAN 70 DEVICE 99 3 7
71 | SPAN 71 DEVICE 99 32 36
72 | SPAN 72 EXPERIMENT:current_exp 99 37 42
73 | SPAN 73 VALUE 99 73 84
74 | SPAN 74 VALUE 99 88 94
75 | SPAN 75 VALUE 99 164 169
76 | SPAN 76 DEVICE 100 3 7
77 | SPAN 77 EXPERIMENT:current_exp 100 36 41
78 | SPAN 78 VALUE 100 84 96
79 | SPAN 79 VALUE 100 131 136
80 | SPAN 80 VALUE 100 140 146
81 | EXPERIMENT 1 3
82 | cathode_material 1
83 | device 4
84 | EXPERIMENT 2 6
85 | device 5
86 | power_density 7
87 | working_temperature 8
88 | time_of_operation 9
89 | EXPERIMENT 3 12
90 | current_density 10
91 | current_density 11
92 | voltage 13
93 | working_temperature 14
94 | EXPERIMENT 4 16
95 | device 15
96 | fuel_used 17
97 | cathode_material 18
98 | cathode_material 20
99 | EXPERIMENT 5 22
100 | device 21
101 | resistance 23
102 | resistance 24
103 | resistance 25
104 | working_temperature 26
105 | working_temperature 27
106 | working_temperature 28
107 | EXPERIMENT 6 29
108 | resistance 30
109 | resistance 31
110 | resistance 32
111 | working_temperature 33
112 | working_temperature 34
113 | working_temperature 35
114 | EXPERIMENT 7 37
115 | cathode_material 36
116 | resistance 38
117 | resistance 39
118 | resistance 40
119 | working_temperature 41
120 | working_temperature 42
121 | working_temperature 43
122 | EXPERIMENT 8 44
123 | device 45
124 | fuel_used 46
125 | fuel_used 47
126 | EXPERIMENT 9 48
127 | power_density 49
128 | working_temperature 50
129 | device 51
130 | EXPERIMENT 10 54
131 | device 53
132 | current_density 55
133 | current_density 56
134 | voltage 57
135 | working_temperature 58
136 | EXPERIMENT 11 59
137 | device 60
138 | time_of_operation 61
139 | EXPERIMENT 12 67
140 | device 68
141 | device 69
142 | EXPERIMENT 13 72
143 | device 70
144 | device 71
145 | power_density 73
146 | working_temperature 74
147 | time_of_operation 75
148 | EXPERIMENT 14 77
149 | device 76
150 | current_density 78
151 | voltage 79
152 | working_temperature 80
153 | LINK experiment_variation 6 3
154 | LINK experiment_variation 12 6
155 | LINK same_experiment 44 48
156 | LINK experiment_variation 72 67
157 | LINK experiment_variation 77 72
158 | LINK coreference 1 2
159 | LINK coreference 18 19
160 | LINK coreference 62 63
161 | LINK coreference 65 66
162 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6427619.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 2 65 90
2 | SPAN 2 DEVICE 3 44 53
3 | SPAN 3 EXPERIMENT:current_exp 3 95 107
4 | SPAN 4 MATERIAL 5 46 71
5 | SPAN 5 EXPERIMENT:current_exp 5 72 75
6 | SPAN 6 VALUE 5 76 87
7 | SPAN 7 VALUE 5 92 109
8 | SPAN 8 VALUE 5 113 119
9 | SPAN 9 EXPERIMENT:previous_work 19 12 20
10 | SPAN 10 MATERIAL 19 58 90
11 | SPAN 11 MATERIAL 19 134 151
12 | SPAN 12 VALUE 19 152 164
13 | SPAN 13 EXPERIMENT:previous_work 20 13 18
14 | SPAN 14 MATERIAL 20 41 75
15 | SPAN 15 VALUE 20 98 109
16 | SPAN 16 VALUE 20 113 119
17 | SPAN 17 MATERIAL 46 9 11
18 | SPAN 18 MATERIAL 46 12 14
19 | SPAN 19 DEVICE 46 15 25
20 | SPAN 20 EXPERIMENT:current_exp 46 46 52
21 | SPAN 21 MATERIAL 67 22 47
22 | SPAN 22 EXPERIMENT:current_exp 67 48 52
23 | SPAN 23 VALUE 67 53 96
24 | SPAN 24 VALUE 67 113 123
25 | SPAN 25 EXPERIMENT:current_exp 67 130 132
26 | SPAN 26 MATERIAL 67 147 177
27 | SPAN 27 MATERIAL 67 185 188
28 | SPAN 28 VALUE 67 192 198
29 | SPAN 29 EXPERIMENT:current_exp 75 9 17
30 | SPAN 30 MATERIAL 75 70 86
31 | SPAN 31 MATERIAL 75 101 126
32 | SPAN 32 VALUE 75 160 166
33 | SPAN 33 MATERIAL 79 50 66
34 | SPAN 34 MATERIAL 79 81 106
35 | SPAN 35 EXPERIMENT:current_exp 79 107 110
36 | SPAN 36 VALUE 79 111 121
37 | SPAN 37 VALUE 79 126 136
38 | SPAN 38 MATERIAL 82 175 177
39 | SPAN 39 MATERIAL 82 178 180
40 | SPAN 40 DEVICE 82 181 190
41 | SPAN 41 EXPERIMENT:current_exp 82 191 196
42 | SPAN 42 MATERIAL 82 197 222
43 | SPAN 43 VALUE 82 236 242
44 | SPAN 44 EXPERIMENT:current_exp 82 259 267
45 | SPAN 45 VALUE 82 302 313
46 | SPAN 46 EXPERIMENT:current_exp 82 331 333
47 | SPAN 47 VALUE 82 334 340
48 | SPAN 48 VALUE 82 344 350
49 | SPAN 49 MATERIAL 83 4 25
50 | SPAN 50 EXPERIMENT:current_exp 83 31 34
51 | SPAN 51 VALUE 83 35 47
52 | SPAN 52 EXPERIMENT:current_exp 84 27 29
53 | SPAN 53 DEVICE 84 46 55
54 | SPAN 54 MATERIAL 84 78 92
55 | SPAN 55 MATERIAL 84 101 111
56 | SPAN 56 VALUE 84 113 119
57 | SPAN 57 MATERIAL 84 125 152
58 | SPAN 58 VALUE 84 166 171
59 | SPAN 59 VALUE 84 173 179
60 | SPAN 60 MATERIAL 84 209 229
61 | SPAN 61 MATERIAL 84 237 248
62 | SPAN 62 VALUE 84 262 268
63 | SPAN 63 VALUE 84 270 276
64 | SPAN 64 MATERIAL 89 38 54
65 | SPAN 65 MATERIAL 89 69 94
66 | SPAN 66 EXPERIMENT:current_exp 89 95 99
67 | SPAN 67 VALUE 89 100 110
68 | SPAN 68 VALUE 89 115 125
69 | SPAN 69 VALUE 89 159 165
70 | SPAN 70 MATERIAL 90 46 71
71 | SPAN 71 EXPERIMENT:current_exp 90 72 76
72 | SPAN 72 VALUE 90 77 88
73 | SPAN 73 VALUE 90 93 110
74 | SPAN 74 VALUE 90 114 120
75 | EXPERIMENT 1 3
76 | electrolyte_material 1
77 | device 2
78 | EXPERIMENT 2 5
79 | electrolyte_material 4
80 | power_density 6
81 | conductivity 7
82 | working_temperature 8
83 | EXPERIMENT 3 9
84 | electrolyte_material 10
85 | electrolyte_material 11
86 | working_temperature 12
87 | EXPERIMENT 4 13
88 | electrolyte_material 14
89 | conductivity 15
90 | working_temperature 16
91 | EXPERIMENT 5 20
92 | fuel_used 17
93 | fuel_used 18
94 | device 19
95 | EXPERIMENT 6 22
96 | electrolyte_material 21
97 | conductivity 23
98 | working_temperature 24
99 | EXPERIMENT 7 25
100 | electrolyte_material 26
101 | fuel_used 27
102 | working_temperature 28
103 | EXPERIMENT 8 29
104 | electrolyte_material 30
105 | electrolyte_material 31
106 | working_temperature 32
107 | EXPERIMENT 9 35
108 | electrolyte_material 33
109 | electrolyte_material 34
110 | resistance 36
111 | resistance 37
112 | EXPERIMENT 10 41
113 | fuel_used 38
114 | fuel_used 39
115 | device 40
116 | electrolyte_material 42
117 | EXPERIMENT 11 44
118 | power_density 45
119 | EXPERIMENT 12 46
120 | voltage 47
121 | working_temperature 48
122 | EXPERIMENT 13 50
123 | electrolyte_material 49
124 | power_density 51
125 | EXPERIMENT 14 52
126 | device 53
127 | electrolyte_material 54
128 | electrolyte_material 55
129 | working_temperature 56
130 | electrolyte_material 57
131 | working_temperature 59
132 | electrolyte_material 60
133 | working_temperature 63
134 | EXPERIMENT 15 66
135 | electrolyte_material 64
136 | electrolyte_material 65
137 | resistance 67
138 | resistance 68
139 | working_temperature 69
140 | EXPERIMENT 16 71
141 | electrolyte_material 70
142 | power_density 72
143 | conductivity 73
144 | working_temperature 74
145 | LINK experiment_variation 25 22
146 | LINK thickness 42 43
147 | LINK same_experiment 41 44
148 | LINK experiment_variation 50 46
149 | LINK same_experiment 44 46
150 | LINK experiment_variation 52 50
151 | LINK thickness 57 58
152 | LINK thickness 61 62
153 | LINK same_experiment 66 71
154 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5457196.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 213
2 | 2 0 214 391
3 | 3 0 392 430
4 | 4 0 431 781
5 | 5 0 782 868
6 | 6 0 869 1107
7 | 7 1 1108 1391
8 | 8 1 1392 1517
9 | 9 1 1518 1818
10 | 10 0 1819 2013
11 | 11 0 2014 2220
12 | 12 0 2221 2400
13 | 13 1 2401 2531
14 | 14 0 2532 2759
15 | 15 1 2760 3018
16 | 16 0 3019 3501
17 | 17 0 3502 3612
18 | 18 0 3613 3787
19 | 19 1 3788 3926
20 | 20 0 3927 4050
21 | 21 0 4051 4160
22 | 22 1 4161 4390
23 | 23 0 4391 4490
24 | 24 0 4491 4654
25 | 25 1 4655 4917
26 | 26 0 4918 5018
27 | 27 0 5019 5233
28 | 28 0 5234 5444
29 | 29 0 5445 5921
30 | 30 0 5922 6199
31 | 31 0 6200 6417
32 | 32 0 6418 6498
33 | 33 0 6499 6628
34 | 34 0 6629 6834
35 | 35 0 6835 6847
36 | 36 0 6848 6922
37 | 37 0 6923 7010
38 | 38 0 7011 7143
39 | 39 0 7144 7225
40 | 40 0 7226 7386
41 | 41 0 7387 7531
42 | 42 0 7532 7742
43 | 43 0 7743 7806
44 | 44 0 7807 7983
45 | 45 0 7984 8021
46 | 46 0 8022 8101
47 | 47 0 8102 8154
48 | 48 0 8155 8243
49 | 49 0 8244 8331
50 | 50 1 8332 8470
51 | 51 1 8471 8585
52 | 52 0 8586 8739
53 | 53 0 8740 8844
54 | 54 0 8845 8919
55 | 55 0 8920 8983
56 | 56 0 8984 9095
57 | 57 0 9096 9271
58 | 58 0 9272 9396
59 | 59 0 9397 9483
60 | 60 0 9484 9574
61 | 61 0 9575 9700
62 | 62 0 9701 9851
63 | 63 1 9852 10062
64 | 64 0 10063 10192
65 | 65 0 10193 10290
66 | 66 0 10291 10363
67 | 67 0 10364 10443
68 | 68 0 10444 10624
69 | 69 0 10625 10731
70 | 70 0 10732 10883
71 | 71 0 10884 10982
72 | 72 0 10983 11196
73 | 73 0 11197 11257
74 | 74 0 11258 11393
75 | 75 0 11394 11453
76 | 76 0 11454 11516
77 | 77 0 11517 11663
78 | 78 0 11664 11777
79 | 79 0 11778 11898
80 | 80 0 11899 11956
81 | 81 0 11957 12038
82 | 82 0 12039 12258
83 | 83 0 12259 12390
84 | 84 0 12391 12551
85 | 85 0 12552 12679
86 | 86 0 12680 12901
87 | 87 0 12902 13026
88 | 88 0 13027 13191
89 | 89 0 13192 13322
90 | 90 0 13323 13462
91 | 91 0 13463 13657
92 | 92 0 13658 13821
93 | 93 0 13822 13926
94 | 94 0 13927 14064
95 | 95 0 14065 14130
96 | 96 0 14131 14239
97 | 97 0 14240 14489
98 | 98 0 14490 14602
99 | 99 0 14603 14680
100 | 100 0 14681 14739
101 | 101 0 14740 14857
102 | 102 0 14858 14974
103 | 103 0 14975 15125
104 | 104 0 15126 15244
105 | 105 0 15245 15350
106 | 106 0 15351 15544
107 | 107 0 15545 15629
108 | 108 0 15630 15702
109 | 109 0 15703 15814
110 | 110 0 15815 15876
111 | 111 0 15877 15934
112 | 112 0 15935 16072
113 | 113 0 16073 16275
114 | 114 0 16276 16429
115 | 115 0 16430 16538
116 | 116 0 16539 16972
117 | 117 0 16973 17137
118 | 118 0 17138 17200
119 | 119 0 17201 17379
120 | 120 0 17380 17499
121 | 121 0 17500 17594
122 | 122 0 17595 17665
123 | 123 0 17666 17775
124 | 124 1 17776 17914
125 | 125 1 17915 18027
126 | 126 0 18028 18137
127 | 127 1 18138 18304
128 | 128 0 18305 18371
129 | 129 0 18372 18418
130 | 130 0 18419 18443
131 | 131 0 18444 18523
132 | 132 1 18524 18643
133 | 133 1 18644 18739
134 | 134 0 18740 18848
135 | 135 0 18849 19016
136 | 136 0 19017 19126
137 | 137 0 19127 19327
138 | 138 0 19328 19554
139 | 139 0 19555 19852
140 | 140 0 19853 20012
141 | 141 0 20013 20383
142 | 142 0 20384 20522
143 | 143 0 20523 20589
144 | 144 0 20590 20858
145 | 145 0 20859 20941
146 | 146 0 20942 21128
147 | 147 0 21129 21247
148 | 148 0 21248 21454
149 | 149 0 21455 21588
150 | 150 0 21589 21739
151 | 151 0 21740 21874
152 | 152 0 21875 22617
153 | 153 0 22618 22693
154 | 154 0 22694 22964
155 | 155 0 22965 23167
156 | 156 0 23168 23494
157 | 157 0 23495 23582
158 | 158 0 23583 23777
159 | 159 0 23778 23936
160 | 160 0 23937 24093
161 | 161 0 24094 24192
162 | 162 0 24193 24235
163 | 163 0 24236 24362
164 | 164 0 24363 24490
165 | 165 0 24491 24825
166 | 166 0 24826 24895
167 | 167 0 24896 24957
168 | 168 0 24958 25212
169 | 169 0 25213 25292
170 | 170 0 25293 25381
171 | 171 0 25382 25450
172 | 172 0 25451 25551
173 | 173 0 25552 25623
174 | 174 0 25624 25961
175 | 175 0 25962 26058
176 | 176 0 26059 26097
177 | 177 0 26098 26245
178 | 178 0 26246 26337
179 | 179 0 26338 26428
180 | 180 0 26429 26666
181 | 181 0 26667 26748
182 | 182 0 26749 27085
183 | 183 0 27086 27144
184 | 184 0 27145 27281
185 | 185 0 27282 27356
186 | 186 0 27357 27438
187 | 187 0 27439 27528
188 | 188 0 27529 27670
189 | 189 0 27671 27841
190 | 190 0 27842 28078
191 | 191 0 28079 28247
192 | 192 0 28248 28375
193 | 193 0 28376 28565
194 | 194 0 28566 28663
195 | 195 0 28664 28833
196 | 196 0 28834 28997
197 | 197 0 28998 29215
198 | 198 0 29216 29335
199 | 199 0 29336 29509
200 | 200 0 29510 29653
201 | 201 0 29654 29770
202 | 202 0 29771 29952
203 | 203 0 29953 30235
204 | 204 0 30236 30458
205 | 205 0 30459 30719
206 | 206 0 30720 30925
207 | 207 0 30926 31086
208 |
--------------------------------------------------------------------------------
/code/source/utils.py:
--------------------------------------------------------------------------------
1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020).
2 | Copyright (c) 2020 Robert Bosch GmbH
3 | @author: Heike Adel
4 | @author: Annemarie Friedrich
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU Affero General Public License as published
8 | by the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU Affero General Public License for more details.
15 | You should have received a copy of the GNU Affero General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 |
20 | import numpy as np
21 |
22 |
23 | def compute_eval_scores(conf_matrix, labels):
24 | """
25 | :param conf_matrix: num_classes x num_classes confusion matrix, first dimension is the gold standard label,
26 | second dimension is the predicted label
27 | :param labels: list of labels, indices correspond to the indices in the confusion matrix
28 | :return: accuracy, macro p, r, f, dictionaries with p, r, f by class
29 | """
30 | print("labels", labels)
31 | num_classes = len(labels)
32 | conf = np.array(conf_matrix)
33 |
34 | p = {} # precision by class
35 | r = {} # recall by class
36 | f1 = {} # F1 by class
37 |
38 | micro_tp = 0
39 | micro_total_gold = 0
40 | micro_total_pred = 0
41 |
42 | for i in range(num_classes):
43 | cat = labels[i]
44 | correct = conf[i, i]
45 | total_gold = sum(conf[i])
46 | total_pred = sum(conf[:,i])
47 | if i > 0: # do not include negative class
48 | micro_tp += correct
49 | micro_total_gold += total_gold
50 | micro_total_pred += total_pred
51 | # Define for now: if not predicted this class at all, P=1
52 | if total_pred == 0:
53 | p[cat] = 1
54 | else:
55 | p[cat] = correct/total_pred * 100
56 | if total_gold == 0:
57 | r[cat] = 0
58 | else:
59 | r[cat] = correct/total_gold * 100
60 | # Define for now: if P=0 and R=0 then F1 = 0
61 | if p[cat] == 0 and r[cat] == 0:
62 | f1[cat] = 0
63 | else:
64 | f1[cat] = 2*p[cat]*r[cat]/(p[cat]+r[cat])
65 |
66 | # micro-averages
67 | if micro_total_pred == 0:
68 | micro_p = 0
69 | else:
70 | micro_p = micro_tp / micro_total_pred * 100
71 | if micro_total_gold == 0:
72 | micro_r = 0
73 | else:
74 | micro_r = micro_tp / micro_total_gold * 100
75 | if micro_p + micro_r == 0:
76 | micro_f1 = 0
77 | else:
78 | micro_f1 = 2 * micro_p * micro_r / (micro_p + micro_r)
79 |
80 | # macro-averages (each class weighted equally)
81 | macro_p = sum(p.values()) / num_classes
82 | macro_r = sum(r.values()) / num_classes
83 | # macro-avg. F1 is the harmonic mean of macro-p and macro-r
84 | macro_f1 = 2*macro_p*macro_r/(macro_p+macro_r)
85 |
86 | # accuracy
87 | correct = 0
88 | total = 0
89 | for i in range(num_classes):
90 | correct += conf[i,i]
91 | total += sum(conf[i])
92 | accuracy = correct/total*100
93 |
94 | return accuracy, macro_p, macro_r, macro_f1, micro_p, micro_r, micro_f1, p, r, f1
95 |
96 |
97 | def print_results_classification(conf_matrix, dataset_name, num_labels):
98 | """
99 | :param conf_matrix: confusion matrix with predictions vs. true labels
100 | :param dataset_name: name of dataset (for printing only)
101 | :param num_labels: number of labels
102 | :return:
103 | """
104 | results = compute_eval_scores(conf_matrix, [i for i in range(num_labels)])
105 | acc, macro_p, macro_r, macro_f, micro_p, micro_r, micro_f1, classwise_p, classwise_r, classwise_f = results
106 | print(dataset_name + ' F1: {:.1f}'.format(micro_f1))
107 | print("class-wise results:")
108 | for cl in range(num_labels):
109 | print("{0: <11}".format(cl) + " {:7.1f}".format(classwise_p[cl]) + " {:7.1f}".format(classwise_r[cl]) + " {:7.1f}".format(classwise_f[cl]))
110 | print("macro-avg:", "{:7.1f}".format(macro_p), "{:7.1f}".format(macro_r), "{:7.1f}".format(macro_f))
111 | for row in conf_matrix:
112 | print("\t", row)
113 | return classwise_p, classwise_r, classwise_f
114 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC5944822.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 252
2 | 2 0 253 501
3 | 3 0 502 703
4 | 4 0 704 1043
5 | 5 0 1044 1354
6 | 6 0 1355 1519
7 | 7 0 1520 1687
8 | 8 0 1688 1820
9 | 9 0 1821 1910
10 | 10 0 1911 2591
11 | 11 0 2592 2810
12 | 12 0 2811 2943
13 | 13 0 2944 3321
14 | 14 0 3322 4200
15 | 15 0 4201 4367
16 | 16 0 4368 4507
17 | 17 0 4508 4640
18 | 18 0 4641 4846
19 | 19 0 4847 5042
20 | 20 0 5043 5207
21 | 21 0 5208 5325
22 | 22 0 5326 5587
23 | 23 0 5588 6121
24 | 24 0 6122 6335
25 | 25 0 6336 6998
26 | 26 0 6999 7191
27 | 27 0 7192 7364
28 | 28 0 7365 7561
29 | 29 0 7562 7934
30 | 30 0 7935 8640
31 | 31 0 8641 8814
32 | 32 0 8815 9048
33 | 33 0 9049 9279
34 | 34 0 9280 9751
35 | 35 0 9752 10600
36 | 36 0 10601 11252
37 | 37 0 11253 11620
38 | 38 0 11621 11796
39 | 39 0 11797 11862
40 | 40 0 11863 12048
41 | 41 0 12049 12089
42 | 42 0 12090 12134
43 | 43 0 12135 12455
44 | 44 0 12456 12562
45 | 45 0 12563 14283
46 | 46 0 14284 14501
47 | 47 0 14502 14866
48 | 48 0 14867 15242
49 | 49 0 15243 15429
50 | 50 0 15430 15608
51 | 51 0 15609 15669
52 | 52 0 15670 16401
53 | 53 0 16402 16464
54 | 54 0 16465 16507
55 | 55 0 16508 16642
56 | 56 0 16643 17016
57 | 57 0 17017 17159
58 | 58 0 17160 17329
59 | 59 0 17330 17433
60 | 60 0 17434 17585
61 | 61 0 17586 17711
62 | 62 0 17712 17814
63 | 63 0 17815 17854
64 | 64 0 17855 17898
65 | 65 0 17899 18135
66 | 66 0 18136 18535
67 | 67 0 18536 18626
68 | 68 0 18627 18802
69 | 69 0 18803 19041
70 | 70 0 19042 19426
71 | 71 0 19427 19949
72 | 72 0 19950 20417
73 | 73 0 20418 21396
74 | 74 0 21397 21611
75 | 75 0 21612 21936
76 | 76 0 21937 22224
77 | 77 0 22225 22798
78 | 78 0 22799 22906
79 | 79 0 22907 23187
80 | 80 0 23188 23329
81 | 81 0 23330 23737
82 | 82 0 23738 24794
83 | 83 0 24795 25577
84 | 84 0 25578 26130
85 | 85 0 26131 26379
86 | 86 0 26380 26910
87 | 87 0 26911 26978
88 | 88 0 26979 27033
89 | 89 0 27034 27073
90 | 90 0 27074 27118
91 | 91 0 27119 27446
92 | 92 0 27447 27588
93 | 93 0 27589 27759
94 | 94 0 27760 28176
95 | 95 0 28177 28340
96 | 96 0 28341 28493
97 | 97 0 28494 28664
98 | 98 0 28665 28851
99 | 99 0 28852 29094
100 | 100 0 29095 29283
101 | 101 0 29284 29435
102 | 102 0 29436 29605
103 | 103 0 29606 29848
104 | 104 0 29849 29924
105 | 105 0 29925 30337
106 | 106 0 30338 30530
107 | 107 0 30531 30829
108 | 108 0 30830 30922
109 | 109 0 30923 31043
110 | 110 0 31044 31259
111 | 111 0 31260 31452
112 | 112 0 31453 31632
113 | 113 0 31633 31819
114 | 114 0 31820 32041
115 | 115 0 32042 32197
116 | 116 0 32198 32428
117 | 117 0 32429 32607
118 | 118 0 32608 33140
119 | 119 0 33141 33515
120 | 120 0 33516 34032
121 | 121 0 34033 34210
122 | 122 0 34211 34387
123 | 123 0 34388 34526
124 | 124 0 34527 34920
125 | 125 0 34921 35257
126 | 126 0 35258 35598
127 | 127 0 35599 36870
128 | 128 0 36871 37086
129 | 129 0 37087 37532
130 | 130 0 37533 38394
131 | 131 0 38395 39134
132 | 132 0 39135 39247
133 | 133 0 39248 39611
134 | 134 0 39612 39685
135 | 135 0 39686 40016
136 | 136 1 40017 40395
137 | 137 0 40396 40565
138 | 138 1 40566 40945
139 | 139 0 40946 41052
140 | 140 0 41053 41397
141 | 141 0 41398 41552
142 | 142 1 41553 41886
143 | 143 0 41887 41993
144 | 144 1 41994 42617
145 | 145 0 42618 43028
146 | 146 0 43029 43277
147 | 147 0 43278 43461
148 | 148 0 43462 43554
149 | 149 1 43555 44131
150 | 150 1 44132 44754
151 | 151 0 44755 44796
152 | 152 0 44797 44841
153 | 153 1 44842 45564
154 | 154 0 45565 45750
155 | 155 0 45751 46207
156 | 156 0 46208 46498
157 | 157 0 46499 46607
158 | 158 0 46608 46793
159 | 159 0 46794 47250
160 | 160 0 47251 49943
161 | 161 0 49944 50153
162 | 162 0 50154 50284
163 | 163 0 50285 50927
164 | 164 0 50928 51274
165 | 165 0 51275 51733
166 | 166 0 51734 52300
167 | 167 0 52301 52389
168 | 168 0 52390 52663
169 | 169 0 52664 52844
170 | 170 0 52845 53015
171 | 171 0 53016 53070
172 | 172 0 53071 53904
173 | 173 0 53905 54136
174 | 174 0 54137 54678
175 | 175 0 54679 55223
176 | 176 0 55224 55540
177 | 177 0 55541 56761
178 | 178 0 56762 56893
179 | 179 0 56894 57028
180 | 180 0 57029 57212
181 | 181 0 57213 57363
182 | 182 0 57364 58086
183 | 183 0 58087 58229
184 | 184 0 58230 58431
185 | 185 0 58432 58796
186 | 186 0 58797 59130
187 | 187 0 59131 59306
188 | 188 0 59307 59742
189 | 189 0 59743 59937
190 | 190 0 59938 60250
191 | 191 0 60251 60410
192 | 192 0 60411 60738
193 | 193 0 60739 60952
194 | 194 0 60953 61016
195 | 195 0 61017 61195
196 | 196 0 61196 61381
197 | 197 0 61382 61594
198 | 198 0 61595 62089
199 | 199 0 62090 62509
200 | 200 0 62510 62651
201 | 201 0 62652 62757
202 | 202 0 62758 62993
203 | 203 0 62994 63490
204 | 204 0 63491 63692
205 | 205 0 63693 64103
206 | 206 0 64104 64109
207 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC6523084.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 653
2 | 2 0 654 816
3 | 3 0 817 1004
4 | 4 0 1005 1213
5 | 5 0 1214 1331
6 | 6 1 1332 1576
7 | 7 0 1577 1726
8 | 8 0 1727 2033
9 | 9 0 2034 2248
10 | 10 0 2249 2353
11 | 11 0 2354 2470
12 | 12 0 2471 2567
13 | 13 0 2568 2723
14 | 14 0 2724 2803
15 | 15 0 2804 2921
16 | 16 0 2922 2997
17 | 17 0 2998 3208
18 | 18 0 3209 3380
19 | 19 0 3381 3660
20 | 20 0 3661 3843
21 | 21 0 3844 4041
22 | 22 0 4042 4114
23 | 23 0 4115 4462
24 | 24 0 4463 4684
25 | 25 0 4685 4823
26 | 26 0 4824 5073
27 | 27 0 5074 5262
28 | 28 0 5263 5491
29 | 29 0 5492 5741
30 | 30 0 5742 6032
31 | 31 0 6033 6145
32 | 32 0 6146 6308
33 | 33 0 6309 6551
34 | 34 0 6552 6711
35 | 35 0 6712 6898
36 | 36 0 6899 7097
37 | 37 0 7098 7213
38 | 38 0 7214 7318
39 | 39 0 7319 7496
40 | 40 0 7497 7656
41 | 41 0 7657 8027
42 | 42 0 8028 8091
43 | 43 0 8092 8186
44 | 44 0 8187 8493
45 | 45 1 8494 8700
46 | 46 1 8701 8958
47 | 47 1 8959 9079
48 | 48 0 9080 9269
49 | 49 0 9270 9401
50 | 50 0 9402 9518
51 | 51 0 9519 9715
52 | 52 0 9716 9860
53 | 53 0 9861 9960
54 | 54 0 9961 10054
55 | 55 0 10055 10254
56 | 56 0 10255 10465
57 | 57 0 10466 10695
58 | 58 0 10696 11026
59 | 59 0 11027 11178
60 | 60 0 11179 11325
61 | 61 0 11326 11517
62 | 62 0 11518 11751
63 | 63 0 11752 11886
64 | 64 0 11887 12014
65 | 65 0 12015 12180
66 | 66 0 12181 12401
67 | 67 0 12402 12508
68 | 68 0 12509 12648
69 | 69 0 12649 12736
70 | 70 0 12737 13032
71 | 71 0 13033 13113
72 | 72 0 13114 13262
73 | 73 0 13263 13807
74 | 74 0 13808 14037
75 | 75 0 14038 14323
76 | 76 0 14324 14575
77 | 77 0 14576 14722
78 | 78 0 14723 14858
79 | 79 0 14859 14986
80 | 80 0 14987 15117
81 | 81 0 15118 15243
82 | 82 0 15244 15414
83 | 83 0 15415 15471
84 | 84 0 15472 15606
85 | 85 0 15607 15762
86 | 86 0 15763 15944
87 | 87 0 15945 16124
88 | 88 0 16125 16303
89 | 89 0 16304 16407
90 | 90 0 16408 16536
91 | 91 0 16537 16604
92 | 92 0 16605 16752
93 | 93 0 16753 16872
94 | 94 0 16873 17008
95 | 95 0 17009 17089
96 | 96 0 17090 17191
97 | 97 0 17192 17287
98 | 98 0 17288 17425
99 | 99 0 17426 17630
100 | 100 0 17631 17724
101 | 101 0 17725 17882
102 | 102 0 17883 18021
103 | 103 0 18022 18121
104 | 104 0 18122 18343
105 | 105 0 18344 18467
106 | 106 0 18468 18589
107 | 107 0 18590 18685
108 | 108 0 18686 18803
109 | 109 0 18804 18935
110 | 110 0 18936 19100
111 | 111 0 19101 19281
112 | 112 0 19282 19574
113 | 113 0 19575 19722
114 | 114 0 19723 19835
115 | 115 0 19836 20099
116 | 116 0 20100 20303
117 | 117 0 20304 20386
118 | 118 0 20387 20539
119 | 119 0 20540 20607
120 | 120 1 20608 20711
121 | 121 0 20712 20815
122 | 122 0 20816 20922
123 | 123 0 20923 21139
124 | 124 0 21140 21255
125 | 125 0 21256 21318
126 | 126 0 21319 21438
127 | 127 0 21439 21512
128 | 128 0 21513 21673
129 | 129 0 21674 21790
130 | 130 0 21791 22029
131 | 131 0 22030 22156
132 | 132 0 22157 22338
133 | 133 0 22339 22399
134 | 134 0 22400 22537
135 | 135 0 22538 22683
136 | 136 0 22684 22712
137 | 137 0 22713 22901
138 | 138 0 22902 22976
139 | 139 0 22977 23173
140 | 140 0 23174 23250
141 | 141 0 23251 23372
142 | 142 0 23373 23522
143 | 143 0 23523 23674
144 | 144 0 23675 23769
145 | 145 0 23770 23873
146 | 146 0 23874 23942
147 | 147 0 23943 24195
148 | 148 0 24196 24305
149 | 149 0 24306 24358
150 | 150 0 24359 24495
151 | 151 0 24496 24672
152 | 152 0 24673 24868
153 | 153 0 24869 25063
154 | 154 0 25064 25355
155 | 155 0 25356 25587
156 | 156 0 25588 25700
157 | 157 0 25701 25869
158 | 158 0 25870 26064
159 | 159 0 26065 26236
160 | 160 0 26237 26281
161 | 161 0 26282 26381
162 | 162 0 26382 26689
163 | 163 1 26690 26759
164 | 164 0 26760 26993
165 | 165 0 26994 27044
166 | 166 0 27045 27246
167 | 167 0 27247 27395
168 | 168 0 27396 27541
169 | 169 0 27542 27683
170 | 170 0 27684 27899
171 | 171 0 27900 28110
172 | 172 0 28111 28404
173 | 173 0 28405 28504
174 | 174 0 28505 28684
175 | 175 0 28685 28882
176 | 176 0 28883 28966
177 | 177 0 28967 29050
178 | 178 0 29051 29176
179 | 179 0 29177 29296
180 | 180 0 29297 29347
181 | 181 0 29348 29451
182 | 182 0 29452 29621
183 | 183 1 29622 29727
184 | 184 1 29728 29812
185 | 185 1 29813 30056
186 | 186 1 30057 30245
187 | 187 0 30246 30402
188 | 188 0 30403 30530
189 | 189 0 30531 30602
190 | 190 0 30603 30829
191 | 191 1 30830 31003
192 | 192 0 31004 31147
193 | 193 0 31148 31348
194 | 194 0 31349 31585
195 | 195 0 31586 31739
196 | 196 0 31740 31871
197 | 197 0 31872 32107
198 | 198 0 32108 32259
199 | 199 0 32260 32423
200 | 200 0 32424 32686
201 | 201 0 32687 32862
202 | 202 0 32863 32964
203 | 203 0 32965 33131
204 | 204 0 33132 33339
205 | 205 0 33340 33464
206 | 206 0 33465 33594
207 | 207 0 33595 33801
208 | 208 0 33802 34026
209 | 209 0 34027 34182
210 | 210 0 34183 34381
211 | 211 0 34382 34545
212 | 212 0 34546 34828
213 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC4222441.csv:
--------------------------------------------------------------------------------
1 | 1 1 0 739
2 | 2 1 740 847
3 | 3 0 848 1008
4 | 4 0 1009 1382
5 | 5 0 1383 1777
6 | 6 0 1778 1856
7 | 7 0 1857 2079
8 | 8 0 2080 2169
9 | 9 1 2170 2248
10 | 10 0 2249 2297
11 | 11 0 2298 2490
12 | 12 0 2491 2664
13 | 13 0 2665 2884
14 | 14 0 2885 3045
15 | 15 0 3046 3330
16 | 16 0 3331 3592
17 | 17 0 3593 3625
18 | 18 0 3626 3802
19 | 19 0 3803 3963
20 | 20 1 3964 4198
21 | 21 1 4199 4448
22 | 22 0 4449 4656
23 | 23 0 4657 4725
24 | 24 0 4726 4903
25 | 25 0 4904 5045
26 | 26 0 5046 5135
27 | 27 0 5136 5305
28 | 28 0 5306 5450
29 | 29 0 5451 5583
30 | 30 0 5584 5830
31 | 31 0 5831 5949
32 | 32 0 5950 6098
33 | 33 0 6099 6280
34 | 34 0 6281 6717
35 | 35 0 6718 6839
36 | 36 0 6840 6998
37 | 37 1 6999 7182
38 | 38 0 7183 7437
39 | 39 1 7438 7585
40 | 40 0 7586 7667
41 | 41 0 7668 7771
42 | 42 0 7772 7829
43 | 43 0 7830 7977
44 | 44 0 7978 8055
45 | 45 0 8056 8508
46 | 46 0 8509 8607
47 | 47 0 8608 8703
48 | 48 0 8704 8738
49 | 49 0 8739 8868
50 | 50 0 8869 9020
51 | 51 0 9021 9135
52 | 52 1 9136 9361
53 | 53 0 9362 9539
54 | 54 0 9540 9574
55 | 55 0 9575 9799
56 | 56 0 9800 10027
57 | 57 0 10028 10160
58 | 58 0 10161 10275
59 | 59 0 10276 10315
60 | 60 0 10316 10589
61 | 61 0 10590 10669
62 | 62 0 10670 10887
63 | 63 0 10888 11009
64 | 64 0 11010 11104
65 | 65 0 11105 11206
66 | 66 0 11207 11326
67 | 67 0 11327 11438
68 | 68 0 11439 11527
69 | 69 0 11528 11661
70 | 70 0 11662 11781
71 | 71 0 11782 11841
72 | 72 0 11842 12023
73 | 73 0 12024 12678
74 | 74 0 12679 12770
75 | 75 0 12771 12843
76 | 76 0 12844 12903
77 | 77 0 12904 13059
78 | 78 0 13060 13119
79 | 79 0 13120 13262
80 | 80 0 13263 13402
81 | 81 0 13403 13526
82 | 82 0 13527 13613
83 | 83 0 13614 13675
84 | 84 0 13676 13785
85 | 85 0 13786 13976
86 | 86 0 13977 14097
87 | 87 0 14098 14259
88 | 88 0 14260 14537
89 | 89 0 14538 14629
90 | 90 0 14630 14834
91 | 91 0 14835 14938
92 | 92 0 14939 15093
93 | 93 0 15094 15169
94 | 94 0 15170 15234
95 | 95 0 15235 15352
96 | 96 0 15353 15464
97 | 97 0 15465 15615
98 | 98 0 15616 15800
99 | 99 0 15801 15903
100 | 100 0 15904 16179
101 | 101 0 16180 16332
102 | 102 0 16333 16551
103 | 103 0 16552 16640
104 | 104 0 16641 16775
105 | 105 0 16776 16889
106 | 106 0 16890 16988
107 | 107 0 16989 17130
108 | 108 0 17131 17190
109 | 109 0 17191 17502
110 | 110 0 17503 17579
111 | 111 0 17580 17627
112 | 112 0 17628 17834
113 | 113 0 17835 17944
114 | 114 0 17945 18059
115 | 115 0 18060 18257
116 | 116 0 18258 18409
117 | 117 0 18410 18486
118 | 118 0 18487 18672
119 | 119 0 18673 18759
120 | 120 0 18760 19213
121 | 121 0 19214 19503
122 | 122 0 19504 19659
123 | 123 0 19660 19888
124 | 124 0 19889 20161
125 | 125 0 20162 20249
126 | 126 0 20250 20578
127 | 127 0 20579 20818
128 | 128 0 20819 20987
129 | 129 0 20988 21142
130 | 130 0 21143 21282
131 | 131 0 21283 21385
132 | 132 0 21386 21457
133 | 133 0 21458 21501
134 | 134 0 21502 21628
135 | 135 0 21629 21772
136 | 136 0 21773 21873
137 | 137 0 21874 22079
138 | 138 0 22080 22333
139 | 139 0 22334 22516
140 | 140 0 22517 22761
141 | 141 0 22762 22938
142 | 142 0 22939 23125
143 | 143 0 23126 23360
144 | 144 0 23361 23538
145 | 145 0 23539 23836
146 | 146 0 23837 24094
147 | 147 0 24095 24246
148 | 148 0 24247 24549
149 | 149 0 24550 24781
150 | 150 0 24782 25011
151 | 151 0 25012 25192
152 | 152 0 25193 25324
153 | 153 0 25325 25521
154 | 154 0 25522 25645
155 | 155 0 25646 25742
156 | 156 0 25743 25940
157 | 157 0 25941 26142
158 | 158 0 26143 26322
159 | 159 0 26323 26466
160 | 160 0 26467 26548
161 | 161 0 26549 26618
162 | 162 0 26619 26816
163 | 163 0 26817 26926
164 | 164 0 26927 27070
165 | 165 1 27071 27209
166 | 166 0 27210 27450
167 | 167 0 27451 27511
168 | 168 0 27512 27668
169 | 169 0 27669 27837
170 | 170 0 27838 28081
171 | 171 0 28082 28219
172 | 172 0 28220 28468
173 | 173 0 28469 28716
174 | 174 0 28717 28858
175 | 175 0 28859 28973
176 | 176 0 28974 29024
177 | 177 0 29025 29129
178 | 178 0 29130 29252
179 | 179 0 29253 29467
180 | 180 0 29468 29707
181 | 181 0 29708 29846
182 | 182 0 29847 29886
183 | 183 0 29887 29998
184 | 184 0 29999 30204
185 | 185 0 30205 30389
186 | 186 0 30390 30540
187 | 187 0 30541 30669
188 | 188 0 30670 30819
189 | 189 0 30820 30993
190 | 190 0 30994 31151
191 | 191 0 31152 31302
192 | 192 0 31303 31624
193 | 193 0 31625 31824
194 | 194 0 31825 31924
195 | 195 0 31925 32189
196 | 196 0 32190 32403
197 | 197 0 32404 32479
198 | 198 0 32480 32519
199 | 199 0 32520 32598
200 | 200 0 32599 32765
201 | 201 0 32766 32957
202 | 202 0 32958 33142
203 | 203 0 33143 33249
204 | 204 0 33250 33381
205 | 205 0 33382 33551
206 | 206 0 33552 33682
207 | 207 0 33683 33821
208 | 208 0 33822 33914
209 | 209 0 33915 33996
210 | 210 0 33997 34137
211 | 211 0 34138 34352
212 | 212 0 34353 34490
213 | 213 0 34491 34609
214 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6445146.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 8 31 38
2 | SPAN 2 VALUE 8 74 85
3 | SPAN 3 VALUE 8 89 95
4 | SPAN 4 VALUE 9 257 267
5 | SPAN 5 EXPERIMENT:current_exp 11 17 23
6 | SPAN 6 DEVICE 11 43 55
7 | SPAN 7 DEVICE 11 77 98
8 | SPAN 8 DEVICE 12 25 29
9 | SPAN 9 EXPERIMENT:general_info 12 46 53
10 | SPAN 10 MATERIAL 12 91 111
11 | SPAN 11 DEVICE 13 15 42
12 | SPAN 12 DEVICE 13 44 49
13 | SPAN 13 DEVICE 13 66 89
14 | SPAN 14 EXPERIMENT:general_info 13 126 139
15 | SPAN 15 VALUE 13 225 235
16 | SPAN 16 DEVICE 13 279 288
17 | SPAN 17 DEVICE 14 125 129
18 | SPAN 18 DEVICE 16 67 72
19 | SPAN 19 EXPERIMENT:general_info 16 115 117
20 | SPAN 20 DEVICE 16 148 161
21 | SPAN 21 VALUE 16 167 177
22 | SPAN 22 VALUE 16 181 189
23 | SPAN 23 EXPERIMENT:current_exp 28 11 17
24 | SPAN 24 DEVICE 28 58 62
25 | SPAN 25 DEVICE 28 171 175
26 | SPAN 26 EXPERIMENT:current_exp 35 49 53
27 | SPAN 27 MATERIAL 35 77 89
28 | SPAN 28 MATERIAL 35 91 94
29 | SPAN 29 MATERIAL 35 96 122
30 | SPAN 30 MATERIAL 35 124 127
31 | SPAN 31 EXPERIMENT:current_exp 100 68 71
32 | SPAN 32 VALUE 100 72 83
33 | SPAN 33 VALUE 100 87 97
34 | SPAN 34 EXPERIMENT:current_exp 100 166 176
35 | SPAN 35 VALUE 100 181 185
36 | SPAN 36 MATERIAL 100 192 195
37 | SPAN 37 DEVICE 101 61 65
38 | SPAN 38 EXPERIMENT:current_exp 101 66 70
39 | SPAN 39 VALUE 101 71 75
40 | SPAN 40 VALUE 101 77 81
41 | SPAN 41 VALUE 101 83 87
42 | SPAN 42 VALUE 101 92 103
43 | SPAN 43 VALUE 101 107 110
44 | SPAN 44 VALUE 101 112 115
45 | SPAN 45 VALUE 101 117 120
46 | SPAN 46 VALUE 101 125 131
47 | SPAN 47 DEVICE 101 197 202
48 | SPAN 48 VALUE 101 223 229
49 | SPAN 49 EXPERIMENT:current_exp 101 230 234
50 | SPAN 50 VALUE 101 235 239
51 | SPAN 51 VALUE 101 241 245
52 | SPAN 52 VALUE 101 250 261
53 | SPAN 53 VALUE 106 38 49
54 | SPAN 54 EXPERIMENT:current_exp 106 50 58
55 | SPAN 55 DEVICE 106 137 142
56 | SPAN 56 MATERIAL 106 148 151
57 | SPAN 57 EXPERIMENT:current_exp 107 48 60
58 | SPAN 58 DEVICE 107 104 108
59 | SPAN 59 VALUE 107 112 118
60 | SPAN 60 VALUE 107 123 128
61 | SPAN 61 VALUE 108 87 92
62 | SPAN 62 DEVICE 112 130 135
63 | SPAN 63 EXPERIMENT:current_exp 112 170 173
64 | SPAN 64 VALUE 112 174 177
65 | SPAN 65 VALUE 112 179 183
66 | SPAN 66 VALUE 112 188 198
67 | SPAN 67 DEVICE 120 103 107
68 | SPAN 68 MATERIAL 120 113 116
69 | SPAN 69 EXPERIMENT:current_exp 120 129 135
70 | SPAN 70 VALUE 120 277 286
71 | SPAN 71 VALUE 140 24 30
72 | SPAN 72 MATERIAL 140 36 38
73 | SPAN 73 EXPERIMENT:current_exp 140 51 55
74 | SPAN 74 VALUE 140 56 60
75 | SPAN 75 VALUE 140 62 66
76 | SPAN 76 VALUE 140 71 82
77 | SPAN 77 DEVICE 140 221 225
78 | SPAN 78 VALUE 140 227 238
79 | SPAN 79 EXPERIMENT:current_exp 141 29 38
80 | SPAN 80 VALUE 141 48 53
81 | SPAN 81 DEVICE 141 54 63
82 | SPAN 82 DEVICE 143 21 26
83 | SPAN 83 EXPERIMENT:current_exp 143 32 41
84 | SPAN 84 VALUE 143 113 122
85 | EXPERIMENT 1 1
86 | power_density 2
87 | working_temperature 3
88 | EXPERIMENT 2 5
89 | device 6
90 | device 7
91 | EXPERIMENT 3 9
92 | device 8
93 | electrolyte_material 10
94 | EXPERIMENT 4 14
95 | device 11
96 | working_temperature 15
97 | device 16
98 | EXPERIMENT 5 19
99 | device 18
100 | device 20
101 | power_density 21
102 | power_density 22
103 | EXPERIMENT 6 23
104 | device 24
105 | device 25
106 | EXPERIMENT 7 26
107 | anode_material 27
108 | anode_material 29
109 | EXPERIMENT 8 31
110 | open_circuit_voltage 32
111 | working_temperature 33
112 | EXPERIMENT 9 34
113 | electrolyte_material 36
114 | EXPERIMENT 10 38
115 | device 37
116 | power_density 39
117 | power_density 40
118 | power_density 41
119 | power_density 42
120 | working_temperature 43
121 | working_temperature 44
122 | working_temperature 45
123 | working_temperature 46
124 | EXPERIMENT 11 49
125 | device 47
126 | working_temperature 48
127 | power_density 50
128 | power_density 51
129 | power_density 52
130 | EXPERIMENT 12 54
131 | power_density 53
132 | device 55
133 | electrolyte_material 56
134 | EXPERIMENT 13 57
135 | device 58
136 | working_temperature 59
137 | voltage 60
138 | time_of_operation 61
139 | EXPERIMENT 14 63
140 | device 62
141 | resistance 64
142 | resistance 65
143 | resistance 66
144 | EXPERIMENT 15 69
145 | device 67
146 | electrolyte_material 68
147 | power_density 70
148 | EXPERIMENT 16 73
149 | working_temperature 71
150 | fuel_used 72
151 | power_density 74
152 | power_density 75
153 | power_density 76
154 | EXPERIMENT 17 79
155 | time_of_operation 80
156 | device 81
157 | EXPERIMENT 18 83
158 | device 82
159 | power_density 84
160 | LINK same_experiment 31 34
161 | LINK thickness 36 35
162 | LINK same_experiment 34 38
163 | LINK same_experiment 38 49
164 | LINK coreference 11 12
165 | LINK coreference 11 13
166 | LINK coreference 27 28
167 | LINK coreference 29 30
168 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/sentences/PMC3793895.csv:
--------------------------------------------------------------------------------
1 | 1 0 0 234
2 | 2 0 235 264
3 | 3 0 265 353
4 | 4 0 354 370
5 | 5 0 371 888
6 | 6 0 889 1058
7 | 7 0 1059 1267
8 | 8 0 1268 1365
9 | 9 0 1366 1566
10 | 10 0 1567 2413
11 | 11 0 2414 2587
12 | 12 0 2588 3023
13 | 13 1 3024 3467
14 | 14 0 3468 3664
15 | 15 0 3665 3757
16 | 16 0 3758 3927
17 | 17 0 3928 4057
18 | 18 0 4058 4278
19 | 19 0 4279 4391
20 | 20 0 4392 4684
21 | 21 0 4685 4809
22 | 22 0 4810 5073
23 | 23 0 5074 5203
24 | 24 0 5204 5482
25 | 25 0 5483 5614
26 | 26 0 5615 5834
27 | 27 0 5835 6056
28 | 28 0 6057 6287
29 | 29 0 6288 6460
30 | 30 0 6461 6575
31 | 31 0 6576 6671
32 | 32 0 6672 6800
33 | 33 0 6801 7111
34 | 34 0 7112 7237
35 | 35 0 7238 7327
36 | 36 0 7328 7477
37 | 37 0 7478 7626
38 | 38 0 7627 7684
39 | 39 0 7685 7833
40 | 40 0 7834 7905
41 | 41 0 7906 7968
42 | 42 0 7969 8036
43 | 43 0 8037 8151
44 | 44 0 8152 8299
45 | 45 0 8300 8394
46 | 46 0 8395 8635
47 | 47 0 8636 8919
48 | 48 0 8920 9066
49 | 49 0 9067 9360
50 | 50 0 9361 9494
51 | 51 0 9495 9987
52 | 52 0 9988 10232
53 | 53 0 10233 10459
54 | 54 0 10460 10545
55 | 55 0 10546 10759
56 | 56 0 10760 11033
57 | 57 0 11034 11138
58 | 58 0 11139 11391
59 | 59 0 11392 11799
60 | 60 0 11800 12071
61 | 61 0 12072 12373
62 | 62 0 12374 12495
63 | 63 0 12496 12599
64 | 64 0 12600 12763
65 | 65 0 12764 12884
66 | 66 0 12885 12989
67 | 67 0 12990 13104
68 | 68 0 13105 13226
69 | 69 0 13227 13309
70 | 70 0 13310 13444
71 | 71 0 13445 13570
72 | 72 0 13571 13617
73 | 73 0 13618 13756
74 | 74 0 13757 14087
75 | 75 0 14088 14302
76 | 76 0 14303 14503
77 | 77 0 14504 14903
78 | 78 0 14904 15203
79 | 79 0 15204 15272
80 | 80 0 15273 15501
81 | 81 0 15502 15566
82 | 82 0 15567 15663
83 | 83 0 15664 16992
84 | 84 0 16993 17350
85 | 85 0 17351 17427
86 | 86 0 17428 17521
87 | 87 0 17522 17749
88 | 88 0 17750 17873
89 | 89 0 17874 18100
90 | 90 0 18101 18287
91 | 91 0 18288 18432
92 | 92 0 18433 18467
93 | 93 0 18468 18545
94 | 94 0 18546 18659
95 | 95 0 18660 18888
96 | 96 0 18889 18985
97 | 97 0 18986 19190
98 | 98 0 19191 19460
99 | 99 0 19461 19716
100 | 100 0 19717 19923
101 | 101 0 19924 20045
102 | 102 0 20046 20276
103 | 103 0 20277 20426
104 | 104 0 20427 20536
105 | 105 0 20537 20625
106 | 106 0 20626 20860
107 | 107 0 20861 21201
108 | 108 0 21202 21512
109 | 109 0 21513 21631
110 | 110 0 21632 21942
111 | 111 0 21943 22140
112 | 112 0 22141 22360
113 | 113 0 22361 22737
114 | 114 0 22738 22911
115 | 115 0 22912 23155
116 | 116 0 23156 23290
117 | 117 0 23291 23451
118 | 118 0 23452 23578
119 | 119 0 23579 24113
120 | 120 0 24114 24338
121 | 121 0 24339 24592
122 | 122 0 24593 24821
123 | 123 1 24822 25225
124 | 124 0 25226 25332
125 | 125 0 25333 25765
126 | 126 0 25766 25986
127 | 127 0 25987 26088
128 | 128 0 26089 26215
129 | 129 0 26216 26491
130 | 130 0 26492 26611
131 | 131 0 26612 26873
132 | 132 0 26874 26902
133 | 133 0 26903 27127
134 | 134 0 27128 27278
135 | 135 0 27279 27447
136 | 136 0 27448 27575
137 | 137 0 27576 27933
138 | 138 0 27934 28076
139 | 139 0 28077 28594
140 | 140 0 28595 28757
141 | 141 0 28758 28932
142 | 142 0 28933 29287
143 | 143 0 29288 29473
144 | 144 0 29474 29670
145 | 145 0 29671 29903
146 | 146 0 29904 30101
147 | 147 0 30102 30195
148 | 148 0 30196 30391
149 | 149 0 30392 30454
150 | 150 0 30455 30488
151 | 151 0 30489 30709
152 | 152 0 30710 30906
153 | 153 0 30907 31048
154 | 154 1 31049 31306
155 | 155 1 31307 31540
156 | 156 1 31541 31858
157 | 157 0 31859 32036
158 | 158 0 32037 32164
159 | 159 0 32165 32306
160 | 160 0 32307 32499
161 | 161 0 32500 32657
162 | 162 0 32658 32719
163 | 163 0 32720 32812
164 | 164 0 32813 32867
165 | 165 0 32868 33216
166 | 166 1 33217 33305
167 | 167 1 33306 33516
168 | 168 0 33517 33614
169 | 169 0 33615 33837
170 | 170 0 33838 34051
171 | 171 0 34052 34407
172 | 172 0 34408 34676
173 | 173 0 34677 34889
174 | 174 0 34890 35050
175 | 175 0 35051 35190
176 | 176 0 35191 35295
177 | 177 0 35296 35438
178 | 178 0 35439 35624
179 | 179 0 35625 36049
180 | 180 0 36050 36121
181 | 181 0 36122 36539
182 | 182 0 36540 36863
183 | 183 0 36864 37009
184 | 184 0 37010 37142
185 | 185 0 37143 37282
186 | 186 0 37283 37821
187 | 187 0 37822 38072
188 | 188 0 38073 38282
189 | 189 0 38283 38567
190 | 190 0 38568 38766
191 | 191 0 38767 39143
192 | 192 0 39144 39260
193 | 193 0 39261 39388
194 | 194 0 39389 39455
195 | 195 0 39456 39819
196 | 196 0 39820 39997
197 | 197 0 39998 40126
198 | 198 0 40127 40273
199 | 199 0 40274 40436
200 | 200 0 40437 40602
201 | 201 0 40603 40990
202 | 202 0 40991 41214
203 | 203 0 41215 41501
204 | 204 0 41502 41897
205 | 205 0 41898 42147
206 | 206 0 42148 42390
207 | 207 0 42391 42645
208 | 208 0 42646 42888
209 | 209 0 42889 43090
210 | 210 0 43091 43213
211 | 211 0 43214 43438
212 | 212 0 43439 43603
213 | 213 0 43604 43886
214 | 214 0 43887 44071
215 | 215 0 44072 44370
216 | 216 0 44371 44545
217 | 217 0 44546 44857
218 | 218 0 44858 45165
219 | 219 0 45166 45248
220 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5456866.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:previous_work 2 22 29
2 | SPAN 2 MATERIAL 2 78 92
3 | SPAN 3 EXPERIMENT:current_exp 3 24 30
4 | SPAN 4 EXPERIMENT:current_exp 4 8 20
5 | SPAN 5 MATERIAL 4 109 142
6 | SPAN 6 EXPERIMENT:current_exp 4 161 171
7 | SPAN 7 DEVICE 4 187 191
8 | SPAN 8 DEVICE 6 4 15
9 | SPAN 9 MATERIAL 6 26 44
10 | SPAN 10 MATERIAL 6 70 96
11 | SPAN 11 MATERIAL 6 98 102
12 | SPAN 12 MATERIAL 6 123 141
13 | SPAN 13 EXPERIMENT:current_exp 6 151 155
14 | SPAN 14 VALUE 6 183 186
15 | SPAN 15 VALUE 6 191 202
16 | SPAN 16 VALUE 6 206 212
17 | SPAN 17 MATERIAL 6 238 240
18 | SPAN 18 MATERIAL 6 253 256
19 | SPAN 19 MATERIAL 12 0 26
20 | SPAN 20 EXPERIMENT:general_info 12 49 53
21 | SPAN 21 DEVICE 12 74 81
22 | SPAN 22 MATERIAL 13 0 24
23 | SPAN 23 MATERIAL 13 26 30
24 | SPAN 24 MATERIAL 13 40 56
25 | SPAN 25 MATERIAL 13 58 61
26 | SPAN 26 EXPERIMENT:general_info 13 96 108
27 | SPAN 27 VALUE 13 191 203
28 | SPAN 28 EXPERIMENT:previous_work 21 47 54
29 | SPAN 29 MATERIAL 21 103 117
30 | SPAN 30 EXPERIMENT:current_exp 22 23 31
31 | SPAN 31 EXPERIMENT:current_exp 23 3 15
32 | SPAN 32 DEVICE 23 200 204
33 | SPAN 33 MATERIAL 23 211 213
34 | SPAN 34 MATERIAL 108 108 149
35 | SPAN 35 VALUE 111 35 41
36 | SPAN 36 MATERIAL 111 46 86
37 | SPAN 37 VALUE 111 95 99
38 | SPAN 38 VALUE 111 101 105
39 | SPAN 39 VALUE 111 111 122
40 | SPAN 40 MATERIAL 113 17 33
41 | SPAN 41 EXPERIMENT:current_exp 113 34 37
42 | SPAN 42 MATERIAL 113 96 104
43 | SPAN 43 MATERIAL 113 114 138
44 | SPAN 44 MATERIAL 113 140 144
45 | SPAN 45 EXPERIMENT:previous_work 113 146 153
46 | SPAN 46 VALUE 113 158 167
47 | SPAN 47 VALUE 113 171 177
48 | SPAN 48 MATERIAL 113 189 205
49 | SPAN 49 MATERIAL 113 207 210
50 | SPAN 50 MATERIAL 114 58 89
51 | SPAN 51 MATERIAL 114 111 115
52 | SPAN 52 MATERIAL 118 35 69
53 | SPAN 53 EXPERIMENT:current_exp 118 95 101
54 | SPAN 54 DEVICE 118 112 117
55 | SPAN 55 MATERIAL 118 124 145
56 | SPAN 56 VALUE 118 168 180
57 | SPAN 57 MATERIAL 118 181 185
58 | SPAN 58 EXPERIMENT:current_exp 119 12 22
59 | SPAN 59 VALUE 119 98 101
60 | SPAN 60 VALUE 119 106 112
61 | SPAN 61 DEVICE 119 126 131
62 | SPAN 62 MATERIAL 119 146 148
63 | SPAN 63 EXPERIMENT:current_exp 119 172 178
64 | SPAN 64 MATERIAL 119 182 185
65 | SPAN 65 EXPERIMENT:current_exp 120 28 37
66 | SPAN 66 DEVICE 120 45 50
67 | SPAN 67 MATERIAL 120 59 76
68 | SPAN 68 MATERIAL 120 81 99
69 | SPAN 69 VALUE 120 113 116
70 | SPAN 70 VALUE 120 121 132
71 | SPAN 71 VALUE 120 136 142
72 | SPAN 72 EXPERIMENT:previous_work 121 44 52
73 | SPAN 73 MATERIAL 121 57 65
74 | SPAN 74 VALUE 121 80 91
75 | SPAN 75 VALUE 121 95 101
76 | SPAN 76 MATERIAL 125 49 67
77 | SPAN 77 MATERIAL 125 107 111
78 | SPAN 78 VALUE 125 152 162
79 | SPAN 79 VALUE 125 166 172
80 | SPAN 80 EXPERIMENT:previous_work 125 177 185
81 | SPAN 81 MATERIAL 126 43 76
82 | SPAN 82 EXPERIMENT:current_exp 126 93 102
83 | SPAN 83 MATERIAL 133 66 70
84 | EXPERIMENT 1 1
85 | cathode_material 2
86 | EXPERIMENT 2 4
87 | cathode_material 5
88 | EXPERIMENT 3 6
89 | device 7
90 | EXPERIMENT 4 13
91 | device 8
92 | cathode_material 9
93 | electrolyte_material 10
94 | anode_material 12
95 | power_density 14
96 | power_density 15
97 | working_temperature 16
98 | fuel_used 17
99 | fuel_used 18
100 | EXPERIMENT 5 20
101 | cathode_material 19
102 | device 21
103 | EXPERIMENT 6 26
104 | cathode_material 22
105 | cathode_material 24
106 | working_temperature 27
107 | EXPERIMENT 7 28
108 | cathode_material 29
109 | EXPERIMENT 8 31
110 | device 32
111 | fuel_used 33
112 | EXPERIMENT 9 41
113 | cathode_material 40
114 | cathode_material 42
115 | EXPERIMENT 10 45
116 | cathode_material 43
117 | conductivity 46
118 | working_temperature 47
119 | cathode_material 48
120 | EXPERIMENT 11 53
121 | cathode_material 52
122 | device 54
123 | support_material 55
124 | electrolyte_material 57
125 | EXPERIMENT 12 58
126 | working_temperature 59
127 | working_temperature 60
128 | device 61
129 | fuel_used 62
130 | EXPERIMENT 13 63
131 | fuel_used 64
132 | EXPERIMENT 14 65
133 | device 66
134 | cathode_material 67
135 | cathode_material 68
136 | power_density 69
137 | power_density 70
138 | working_temperature 71
139 | EXPERIMENT 15 72
140 | power_density 74
141 | working_temperature 75
142 | EXPERIMENT 16 80
143 | cathode_material 76
144 | electrolyte_material 77
145 | power_density 78
146 | working_temperature 79
147 | EXPERIMENT 17 82
148 | cathode_material 81
149 | LINK experiment_variation 3 1
150 | LINK same_experiment 3 4
151 | LINK same_experiment 4 6
152 | LINK experiment_variation 26 20
153 | LINK experiment_variation 30 28
154 | LINK same_experiment 30 31
155 | LINK same_experiment 41 45
156 | LINK thickness 57 56
157 | LINK same_experiment 53 58
158 | LINK same_experiment 58 63
159 | LINK same_experiment 63 65
160 | LINK coreference 5 9
161 | LINK coreference 10 11
162 | LINK coreference 22 23
163 | LINK coreference 24 25
164 | LINK coreference 43 44
165 | LINK coreference 48 49
166 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC5457052.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 EXPERIMENT:current_exp 1 1092 1098
2 | SPAN 2 MATERIAL 1 1144 1159
3 | SPAN 3 DEVICE 1 1160 1181
4 | SPAN 4 EXPERIMENT:current_exp 2 32 41
5 | SPAN 5 EXPERIMENT:current_exp 2 80 86
6 | SPAN 6 VALUE 2 106 113
7 | SPAN 7 VALUE 2 117 123
8 | SPAN 8 MATERIAL 26 165 190
9 | SPAN 9 MATERIAL 26 192 196
10 | SPAN 10 MATERIAL 27 88 92
11 | SPAN 11 EXPERIMENT:current_exp 27 122 130
12 | SPAN 12 DEVICE 27 136 147
13 | SPAN 13 EXPERIMENT:current_exp 27 188 190
14 | SPAN 14 DEVICE 27 218 223
15 | SPAN 15 VALUE 27 225 234
16 | SPAN 16 VALUE 27 238 243
17 | SPAN 17 VALUE 27 248 254
18 | SPAN 18 MATERIAL 28 87 93
19 | SPAN 19 MATERIAL 28 94 100
20 | SPAN 20 MATERIAL 28 111 118
21 | SPAN 21 VALUE 28 141 147
22 | SPAN 22 VALUE 28 182 199
23 | SPAN 23 VALUE 28 203 208
24 | SPAN 24 EXPERIMENT:previous_work 28 214 222
25 | SPAN 25 EXPERIMENT:current_exp 31 22 25
26 | SPAN 26 MATERIAL 31 40 48
27 | SPAN 27 VALUE 31 97 103
28 | SPAN 28 VALUE 31 105 114
29 | SPAN 29 EXPERIMENT:current_exp 33 23 27
30 | SPAN 30 VALUE 33 76 86
31 | SPAN 31 VALUE 33 90 95
32 | SPAN 32 VALUE 33 100 106
33 | SPAN 33 DEVICE 40 27 33
34 | SPAN 34 EXPERIMENT:current_exp 40 34 43
35 | SPAN 35 VALUE 40 59 64
36 | SPAN 36 DEVICE 40 167 173
37 | SPAN 37 EXPERIMENT:current_exp 40 174 179
38 | SPAN 38 VALUE 40 209 217
39 | SPAN 39 VALUE 40 221 225
40 | SPAN 40 VALUE 40 231 236
41 | SPAN 41 EXPERIMENT:current_exp 40 244 251
42 | SPAN 42 VALUE 40 255 260
43 | SPAN 43 VALUE 40 266 272
44 | SPAN 44 DEVICE 41 97 102
45 | SPAN 45 EXPERIMENT:current_exp 41 103 107
46 | SPAN 46 VALUE 41 119 126
47 | SPAN 47 VALUE 41 140 147
48 | SPAN 48 VALUE 41 160 166
49 | SPAN 49 VALUE 49 65 74
50 | SPAN 50 EXPERIMENT:current_exp 49 79 87
51 | SPAN 51 VALUE 57 44 48
52 | SPAN 52 DEVICE 57 51 57
53 | SPAN 53 EXPERIMENT:current_exp 57 58 62
54 | SPAN 54 VALUE 57 63 68
55 | SPAN 55 VALUE 57 72 77
56 | SPAN 56 DEVICE 57 106 112
57 | SPAN 57 EXPERIMENT:current_exp 57 113 121
58 | SPAN 58 VALUE 57 125 130
59 | SPAN 59 VALUE 57 132 137
60 | SPAN 60 MATERIAL 119 14 29
61 | SPAN 61 DEVICE 119 30 34
62 | SPAN 62 MATERIAL 119 53 58
63 | SPAN 63 MATERIAL 119 91 98
64 | SPAN 64 MATERIAL 119 122 126
65 | SPAN 65 EXPERIMENT:current_exp 119 140 148
66 | SPAN 66 DEVICE 121 40 45
67 | SPAN 67 MATERIAL 121 54 58
68 | SPAN 68 EXPERIMENT:current_exp 121 85 93
69 | SPAN 69 VALUE 121 119 126
70 | SPAN 70 VALUE 121 130 136
71 | SPAN 71 VALUE 121 141 150
72 | SPAN 72 MATERIAL 122 24 27
73 | SPAN 73 DEVICE 122 56 60
74 | SPAN 74 EXPERIMENT:current_exp 122 70 78
75 | SPAN 75 VALUE 122 82 91
76 | SPAN 76 VALUE 122 106 111
77 | SPAN 77 VALUE 122 115 121
78 | SPAN 78 MATERIAL 124 20 23
79 | SPAN 79 EXPERIMENT:current_exp 124 68 76
80 | SPAN 80 VALUE 124 80 89
81 | SPAN 81 VALUE 124 94 98
82 | EXPERIMENT 1 1
83 | support_material 2
84 | device 3
85 | EXPERIMENT 2 5
86 | power_density 6
87 | working_temperature 7
88 | EXPERIMENT 3 11
89 | cathode_material 10
90 | device 12
91 | EXPERIMENT 4 13
92 | device 14
93 | current_density 15
94 | voltage 16
95 | working_temperature 17
96 | EXPERIMENT 5 24
97 | fuel_used 18
98 | fuel_used 19
99 | fuel_used 20
100 | working_temperature 21
101 | power_density 22
102 | voltage 23
103 | EXPERIMENT 6 25
104 | fuel_used 26
105 | working_temperature 27
106 | current_density 28
107 | EXPERIMENT 7 29
108 | current_density 30
109 | voltage 31
110 | working_temperature 32
111 | EXPERIMENT 8 34
112 | device 33
113 | time_of_operation 35
114 | EXPERIMENT 9 37
115 | device 36
116 | degradation_rate 38
117 | degradation_rate 39
118 | time_of_operation 40
119 | EXPERIMENT 10 41
120 | degradation_rate 42
121 | time_of_operation 43
122 | EXPERIMENT 11 45
123 | device 44
124 | open_circuit_voltage 46
125 | open_circuit_voltage 47
126 | working_temperature 48
127 | EXPERIMENT 12 50
128 | current_density 49
129 | EXPERIMENT 13 53
130 | device 52
131 | degradation_rate 54
132 | degradation_rate 55
133 | EXPERIMENT 14 57
134 | device 56
135 | degradation_rate 58
136 | degradation_rate 59
137 | EXPERIMENT 15 65
138 | support_material 60
139 | device 61
140 | support_material 62
141 | anode_material 63
142 | electrolyte_material 64
143 | EXPERIMENT 16 68
144 | device 66
145 | cathode_material 67
146 | power_density 69
147 | working_temperature 70
148 | current_density 71
149 | EXPERIMENT 17 74
150 | support_material 72
151 | device 73
152 | current_density 75
153 | time_of_operation 76
154 | working_temperature 77
155 | EXPERIMENT 18 79
156 | support_material 78
157 | current_density 80
158 | time_of_operation 81
159 | LINK same_experiment 1 4
160 | LINK same_experiment 4 5
161 | LINK same_experiment 11 13
162 | LINK same_experiment 25 29
163 | LINK experiment_variation 37 34
164 | LINK same_experiment 37 41
165 | LINK thickness 53 51
166 | LINK experiment_variation 57 53
167 | LINK same_experiment 65 68
168 | LINK same_experiment 65 68
169 | LINK experiment_variation 79 74
170 | LINK same_experiment 68 74
171 | LINK coreference 8 9
172 |
--------------------------------------------------------------------------------
/sofc-exp-corpus/annotations/frames/PMC6073263.csv:
--------------------------------------------------------------------------------
1 | SPAN 1 MATERIAL 2 105 120
2 | SPAN 2 MATERIAL 2 121 125
3 | SPAN 3 MATERIAL 5 123 133
4 | SPAN 4 EXPERIMENT:current_exp 5 168 180
5 | SPAN 5 VALUE 5 184 194
6 | SPAN 6 EXPERIMENT:current_exp 6 39 48
7 | SPAN 7 MATERIAL 6 58 72
8 | SPAN 8 VALUE 6 74 80
9 | SPAN 9 EXPERIMENT:current_exp 7 40 51
10 | SPAN 10 MATERIAL 7 61 75
11 | SPAN 11 VALUE 7 77 83
12 | SPAN 12 MATERIAL 8 13 17
13 | SPAN 13 EXPERIMENT:current_exp 8 136 140
14 | SPAN 14 DEVICE 8 163 185
15 | SPAN 15 DEVICE 8 187 192
16 | SPAN 16 MATERIAL 9 62 82
17 | SPAN 17 MATERIAL 9 84 87
18 | SPAN 18 EXPERIMENT:general_info 9 98 102
19 | SPAN 19 DEVICE 11 9 14
20 | SPAN 20 MATERIAL 11 21 24
21 | SPAN 21 EXPERIMENT:general_info 11 57 60
22 | SPAN 22 VALUE 11 83 94
23 | SPAN 23 EXPERIMENT:previous_work 14 28 33
24 | SPAN 24 MATERIAL 14 70 73
25 | SPAN 25 VALUE 14 143 158
26 | SPAN 26 VALUE 14 162 168
27 | SPAN 27 EXPERIMENT:previous_work 15 13 21
28 | SPAN 28 MATERIAL 15 31 38
29 | SPAN 29 MATERIAL 15 40 60
30 | SPAN 30 EXPERIMENT:previous_work 15 112 115
31 | SPAN 31 MATERIAL 15 170 173
32 | SPAN 32 VALUE 15 177 187
33 | SPAN 33 MATERIAL 16 22 30
34 | SPAN 34 MATERIAL 16 32 38
35 | SPAN 35 MATERIAL 16 40 44
36 | SPAN 36 MATERIAL 16 56 61
37 | SPAN 37 EXPERIMENT:general_info 16 83 87
38 | SPAN 38 DEVICE 16 112 121
39 | SPAN 39 EXPERIMENT:current_exp 32 143 149
40 | SPAN 40 VALUE 32 213 223
41 | SPAN 41 EXPERIMENT:current_exp 53 35 43
42 | SPAN 42 MATERIAL 53 52 56
43 | SPAN 43 VALUE 53 68 74
44 | SPAN 44 MATERIAL 53 77 81
45 | SPAN 45 VALUE 53 93 100
46 | SPAN 46 MATERIAL 53 107 111
47 | SPAN 47 VALUE 53 123 130
48 | SPAN 48 VALUE 53 138 155
49 | SPAN 49 VALUE 53 157 174
50 | SPAN 50 VALUE 53 180 197
51 | SPAN 51 VALUE 53 201 207
52 | SPAN 52 MATERIAL 55 37 41
53 | SPAN 53 VALUE 55 53 59
54 | SPAN 54 MATERIAL 55 77 81
55 | SPAN 55 VALUE 55 93 100
56 | SPAN 56 MATERIAL 55 138 142
57 | SPAN 57 VALUE 55 154 161
58 | SPAN 58 MATERIAL 70 43 57
59 | SPAN 59 EXPERIMENT:current_exp 70 133 139
60 | SPAN 60 VALUE 70 143 149
61 | SPAN 61 EXPERIMENT:current_exp 73 40 42
62 | SPAN 62 VALUE 73 43 50
63 | SPAN 63 MATERIAL 76 4 6
64 | SPAN 64 MATERIAL 76 7 9
65 | SPAN 65 DEVICE 76 10 19
66 | SPAN 66 EXPERIMENT:current_exp 76 52 58
67 | SPAN 67 VALUE 76 62 68
68 | SPAN 68 MATERIAL 76 77 91
69 | SPAN 69 VALUE 76 93 99
70 | SPAN 70 EXPERIMENT:current_exp 77 10 14
71 | SPAN 71 MATERIAL 77 24 38
72 | SPAN 72 VALUE 77 40 47
73 | SPAN 73 EXPERIMENT:current_exp 77 48 55
74 | SPAN 74 VALUE 77 85 91
75 | SPAN 75 MATERIAL 78 33 47
76 | SPAN 76 VALUE 78 49 55
77 | SPAN 77 EXPERIMENT:current_exp 78 57 59
78 | SPAN 78 VALUE 78 60 72
79 | SPAN 79 VALUE 78 86 92
80 | SPAN 80 VALUE 78 97 103
81 | SPAN 81 EXPERIMENT:general_info 79 39 47
82 | SPAN 82 MATERIAL 79 48 55
83 | SPAN 83 DEVICE 79 76 85
84 | SPAN 84 VALUE 79 91 106
85 | SPAN 85 VALUE 79 110 120
86 | SPAN 86 MATERIAL 86 33 47
87 | SPAN 87 VALUE 86 49 55
88 | SPAN 88 EXPERIMENT:current_exp 86 57 59
89 | SPAN 89 VALUE 86 60 72
90 | SPAN 90 VALUE 86 86 92
91 | SPAN 91 VALUE 86 97 103
92 | EXPERIMENT 1 4
93 | electrolyte_material 3
94 | working_temperature 5
95 | EXPERIMENT 2 6
96 | electrolyte_material 7
97 | working_temperature 8
98 | EXPERIMENT 3 9
99 | electrolyte_material 10
100 | working_temperature 11
101 | EXPERIMENT 4 13
102 | electrolyte_material 12
103 | device 14
104 | EXPERIMENT 5 18
105 | electrolyte_material 16
106 | EXPERIMENT 6 21
107 | device 19
108 | electrolyte_material 20
109 | working_temperature 22
110 | EXPERIMENT 7 23
111 | electrolyte_material 24
112 | power_density 25
113 | working_temperature 26
114 | EXPERIMENT 8 27
115 | electrolyte_material 28
116 | working_temperature 32
117 | EXPERIMENT 9 37
118 | device 38
119 | EXPERIMENT 10 39
120 | working_temperature 40
121 | EXPERIMENT 11 41
122 | electrolyte_material 42
123 | electrolyte_material 44
124 | electrolyte_material 46
125 | conductivity 48
126 | conductivity 49
127 | conductivity 50
128 | working_temperature 51
129 | EXPERIMENT 12 59
130 | electrolyte_material 58
131 | working_temperature 60
132 | EXPERIMENT 13 61
133 | open_circuit_voltage 62
134 | EXPERIMENT 14 66
135 | fuel_used 63
136 | fuel_used 64
137 | device 65
138 | working_temperature 67
139 | electrolyte_material 68
140 | EXPERIMENT 15 70
141 | electrolyte_material 71
142 | EXPERIMENT 16 73
143 | open_circuit_voltage 74
144 | EXPERIMENT 17 77
145 | electrolyte_material 75
146 | power_density 78
147 | working_temperature 80
148 | EXPERIMENT 18 81
149 | support_material 82
150 | device 83
151 | power_density 84
152 | working_temperature 85
153 | EXPERIMENT 19 88
154 | electrolyte_material 86
155 | power_density 89
156 | working_temperature 91
157 | LINK same_experiment 4 6
158 | LINK same_experiment 6 9
159 | LINK same_experiment 27 30
160 | LINK same_experiment 59 61
161 | LINK same_experiment 66 70
162 | LINK same_experiment 70 73
163 | LINK same_experiment 73 77
164 | LINK thickness 75 79
165 | LINK thickness 86 90
166 | LINK coreference 1 2
167 | LINK coreference 14 15
168 | LINK coreference 16 17
169 |
--------------------------------------------------------------------------------