├── code ├── source │ ├── __init__.py │ ├── evaluation │ │ ├── __init__.py │ │ └── ir_stats.py │ ├── model │ │ ├── sequenceTagger.py │ │ ├── attentionLayer.py │ │ ├── encoder.py │ │ └── sentenceClassifier.py │ ├── constants.py │ ├── dataHandling │ │ ├── sampling.py │ │ └── sofc_exp_utils.py │ ├── main_preprocess.py │ └── utils.py ├── README.md └── scripts │ ├── run_experiment_sentence_classification.sh │ ├── run_experiment_entity_typing.sh │ ├── run_experiment_slot_filling.sh │ └── run_cross_validation.sh ├── data ├── embeddings │ └── readme.txt └── models │ └── SciBERT │ └── readme.txt ├── sofc-exp-corpus ├── sofc_exp_guidelines.pdf └── annotations │ ├── frames │ ├── PMC4986314.csv │ ├── PMC4698742.csv │ ├── PMC3793895.csv │ ├── PMC5706185.csv │ ├── PMC3730159.csv │ ├── PMC4222441.csv │ ├── PMC6370853.csv │ ├── PMC6632008.csv │ ├── PMC5944822.csv │ ├── PMC4735809.csv │ ├── PMC4663492.csv │ ├── PMC6523084.csv │ ├── PMC5700654.csv │ ├── PMC6247067.csv │ ├── PMC5456601.csv │ ├── PMC6249295.csv │ ├── PMC6427619.csv │ ├── PMC6445146.csv │ ├── PMC5456866.csv │ ├── PMC5457052.csv │ └── PMC6073263.csv │ └── sentences │ ├── PMC4698742.csv │ ├── PMC4313086.csv │ ├── PMC6073263.csv │ ├── PMC6427619.csv │ ├── PMC4578433.csv │ ├── PMC4663492.csv │ ├── PMC6164086.csv │ ├── PMC6249295.csv │ ├── PMC5457052.csv │ ├── PMC5456866.csv │ ├── PMC3564701.csv │ ├── PMC6632008.csv │ ├── PMC4772004.csv │ ├── PMC5331335.csv │ ├── PMC5700654.csv │ ├── PMC4992832.csv │ ├── PMC5793538.csv │ ├── PMC4673446.csv │ ├── PMC5456869.csv │ ├── PMC6337513.csv │ ├── PMC5216129.csv │ ├── PMC4986314.csv │ ├── PMC5457246.csv │ ├── PMC5848893.csv │ ├── PMC6461657.csv │ ├── PMC6370853.csv │ ├── PMC6247067.csv │ ├── PMC5457058.csv │ ├── PMC6517467.csv │ ├── PMC5457196.csv │ ├── PMC5944822.csv │ ├── PMC6523084.csv │ ├── PMC4222441.csv │ └── PMC3793895.csv ├── sofcexp.yml └── .gitignore /code/source/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/source/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- 1 | See README.md at top level of this project! -------------------------------------------------------------------------------- /data/embeddings/readme.txt: -------------------------------------------------------------------------------- 1 | Place pre-trained embeddings here. -------------------------------------------------------------------------------- /sofc-exp-corpus/sofc_exp_guidelines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/sofc-exp_textmining_resources/HEAD/sofc-exp-corpus/sofc_exp_guidelines.pdf -------------------------------------------------------------------------------- /data/models/SciBERT/readme.txt: -------------------------------------------------------------------------------- 1 | Place PyTorch SciBERT model files here using one folder per model, or change the value of the "pretrained_bert" command line argument to point to your model files. -------------------------------------------------------------------------------- /code/scripts/run_experiment_sentence_classification.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs one experiment 4 | 5 | echo "Current fold if given: $1" 6 | 7 | cd ../source 8 | 9 | source activate transformers # activate your conda envinroment here 10 | 11 | # For sentence classification BERT model: semantics of parameters see main.py 12 | python3 -u main.py -save_dir ../../models/bertLarge100epochs -subsampling 0.3 -batch_size 10 -task sentence \ 13 | -optim adamW -epochs 100 -lr 4e-7 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \ 14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert 15 | 16 | -------------------------------------------------------------------------------- /code/scripts/run_experiment_entity_typing.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs one experiment 4 | 5 | echo "Current fold if given: $1" 6 | 7 | cd ../source 8 | 9 | source activate sofcexp # activate your conda environment here 10 | 11 | # For entity extraction and typing: semantics of parameters see main.py 12 | python3 -u main.py -save_dir ../../models/scibertEntityTyping100epochs -subsampling 0.0 -batch_size 10 -task entity_typing \ 13 | -optim adamW -epochs 100 -lr 1e-5 -lr_bert 1e-5 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \ 14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert 15 | -------------------------------------------------------------------------------- /code/scripts/run_experiment_slot_filling.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs one experiment 4 | 5 | echo "Current fold if given: $1" 6 | 7 | cd ../source 8 | 9 | source activate transformers # activate your conda environment here 10 | 11 | # For slot filling SciBERT model: semantics of parameters see main.py 12 | python3 -u main.py -save_dir ../../models/scibertSlotFilling100epochs -subsampling 0.0 -batch_size 10 -task slot_typing \ 13 | -optim adamW -epochs 100 -lr 1e-5 -lr_bert 1e-5 -adam_epsilon 1e-8 -weight_decay 0 -num_cross_val_folds 5 \ 14 | -current_cross_val_fold $1 -model_type "BERT" -use_cuda -embeddings bert 15 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC4986314.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 1 823 831 2 | SPAN 2 EXPERIMENT:current_exp 2 24 33 3 | SPAN 3 MATERIAL 22 60 86 4 | SPAN 4 MATERIAL 22 88 91 5 | SPAN 5 EXPERIMENT:current_exp 22 97 101 6 | SPAN 6 MATERIAL 22 119 122 7 | SPAN 7 MATERIAL 125 8 11 8 | SPAN 8 EXPERIMENT:current_exp 125 65 75 9 | SPAN 9 VALUE 125 102 112 10 | SPAN 10 VALUE 125 117 127 11 | SPAN 11 MATERIAL 126 91 94 12 | SPAN 12 EXPERIMENT:current_exp 162 85 96 13 | SPAN 13 MATERIAL 162 145 148 14 | SPAN 14 MATERIAL 162 173 176 15 | EXPERIMENT 1 5 16 | electrolyte_material 3 17 | cathode_material 6 18 | EXPERIMENT 2 8 19 | cathode_material 7 20 | resistance 9 21 | resistance 10 22 | EXPERIMENT 3 12 23 | cathode_material 13 24 | electrolyte_material 14 25 | LINK coreference 3 4 26 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4698742.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 727 2 | 2 0 728 1057 3 | 3 0 1058 1278 4 | 4 1 1279 1530 5 | 5 0 1531 1956 6 | 6 0 1957 2091 7 | 7 0 2092 2166 8 | 8 0 2167 2338 9 | 9 0 2339 2453 10 | 10 0 2454 2541 11 | 11 0 2542 2717 12 | 12 0 2718 2791 13 | 13 0 2792 2925 14 | 14 0 2926 3013 15 | 15 0 3014 3293 16 | 16 1 3294 3615 17 | 17 0 3616 3985 18 | 18 0 3986 4239 19 | 19 0 4240 4466 20 | 20 0 4467 4667 21 | 21 0 4668 5031 22 | 22 0 5032 5305 23 | 23 0 5306 5626 24 | 24 0 5627 5962 25 | 25 0 5963 6279 26 | 26 0 6280 6459 27 | 27 0 6460 6769 28 | 28 0 6770 6920 29 | 29 0 6921 7156 30 | 30 0 7157 7341 31 | 31 0 7342 7765 32 | 32 0 7766 7950 33 | 33 0 7951 8166 34 | 34 0 8167 8362 35 | 35 0 8363 8630 36 | 36 0 8631 8808 37 | 37 0 8809 8905 38 | 38 0 8906 9146 39 | 39 0 9147 9283 40 | 40 0 9284 9549 41 | 41 0 9550 9792 42 | 42 0 9793 9931 43 | 43 0 9932 10217 44 | 44 0 10218 10572 45 | 45 0 10573 10839 46 | 46 0 10840 11029 47 | 47 0 11030 11313 48 | 48 0 11314 11512 49 | 49 0 11513 11755 50 | 50 0 11756 11909 51 | 51 0 11910 12151 52 | 52 0 12152 12406 53 | 53 0 12407 12548 54 | 54 0 12549 12656 55 | 55 1 12657 12770 56 | 56 0 12771 12877 57 | 57 1 12878 13144 58 | 58 0 13145 13424 59 | 59 0 13425 13650 60 | 60 1 13651 13868 61 | 61 0 13869 13925 62 | 62 0 13926 13936 63 | 63 0 13937 14073 64 | 64 0 14074 14078 65 | 65 0 14079 14124 66 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4313086.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 594 2 | 2 0 595 722 3 | 3 0 723 824 4 | 4 1 825 1001 5 | 5 0 1002 1091 6 | 6 1 1092 1309 7 | 7 1 1310 1513 8 | 8 0 1514 1609 9 | 9 0 1610 1926 10 | 10 0 1927 2217 11 | 11 0 2218 2432 12 | 12 0 2433 2504 13 | 13 0 2505 2639 14 | 14 0 2640 2794 15 | 15 0 2795 2991 16 | 16 0 2992 3121 17 | 17 1 3122 3216 18 | 18 1 3217 3439 19 | 19 0 3440 3565 20 | 20 0 3566 3570 21 | 21 0 3571 3818 22 | 22 0 3819 3837 23 | 23 0 3838 3999 24 | 24 0 4000 4154 25 | 25 1 4155 4304 26 | 26 1 4305 4459 27 | 27 1 4460 4770 28 | 28 1 4771 4923 29 | 29 1 4924 5195 30 | 30 0 5196 5387 31 | 31 0 5388 5447 32 | 32 0 5448 5710 33 | 33 1 5711 5869 34 | 34 0 5870 6009 35 | 35 0 6010 6186 36 | 36 0 6187 6457 37 | 37 1 6458 6571 38 | 38 1 6572 6708 39 | 39 1 6709 6810 40 | 40 0 6811 6942 41 | 41 0 6943 7103 42 | 42 1 7104 7288 43 | 43 0 7289 7534 44 | 44 0 7535 7667 45 | 45 1 7668 7827 46 | 46 0 7828 8002 47 | 47 0 8003 8231 48 | 48 0 8232 8403 49 | 49 0 8404 8649 50 | 50 0 8650 8861 51 | 51 0 8862 8967 52 | 52 0 8968 9104 53 | 53 0 9105 9109 54 | 54 0 9110 9189 55 | 55 0 9190 9562 56 | 56 0 9563 9607 57 | 57 0 9608 9863 58 | 58 0 9864 10081 59 | 59 0 10082 10153 60 | 60 0 10154 10266 61 | 61 0 10267 10381 62 | 62 0 10382 10386 63 | 63 0 10387 10434 64 | 64 0 10435 10523 65 | 65 0 10524 10611 66 | 66 0 10612 10737 67 | 67 0 10738 10976 68 | 68 0 10977 11065 69 | 69 0 11066 11198 70 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC4698742.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 DEVICE 1 487 491 2 | SPAN 2 MATERIAL 1 497 501 3 | SPAN 3 EXPERIMENT:current_exp 1 547 557 4 | SPAN 4 EXPERIMENT:current_exp 4 39 49 5 | SPAN 5 EXPERIMENT:current_exp 4 90 96 6 | SPAN 6 VALUE 4 111 121 7 | SPAN 7 VALUE 4 125 131 8 | SPAN 8 VALUE 4 137 142 9 | SPAN 9 MATERIAL 4 149 153 10 | SPAN 10 EXPERIMENT:current_exp 16 55 61 11 | SPAN 11 DEVICE 16 91 95 12 | SPAN 12 EXPERIMENT:current_exp 16 108 116 13 | SPAN 13 VALUE 55 3 9 14 | SPAN 14 VALUE 55 15 20 15 | SPAN 15 MATERIAL 55 27 30 16 | SPAN 16 EXPERIMENT:current_exp 55 70 73 17 | SPAN 17 VALUE 55 81 91 18 | SPAN 18 DEVICE 57 27 31 19 | SPAN 19 VALUE 57 37 42 20 | SPAN 20 MATERIAL 57 43 47 21 | SPAN 21 EXPERIMENT:future_work 57 54 61 22 | SPAN 22 VALUE 57 91 101 23 | SPAN 23 VALUE 57 105 111 24 | SPAN 24 EXPERIMENT:current_exp 60 39 49 25 | SPAN 25 VALUE 60 100 110 26 | SPAN 26 VALUE 60 114 120 27 | EXPERIMENT 1 3 28 | device 1 29 | electrolyte_material 2 30 | EXPERIMENT 2 5 31 | power_density 6 32 | working_temperature 7 33 | electrolyte_material 9 34 | EXPERIMENT 3 10 35 | device 11 36 | EXPERIMENT 4 16 37 | working_temperature 13 38 | electrolyte_material 15 39 | power_density 17 40 | EXPERIMENT 5 21 41 | device 18 42 | electrolyte_material 20 43 | power_density 22 44 | working_temperature 23 45 | EXPERIMENT 6 24 46 | power_density 25 47 | working_temperature 26 48 | LINK same_experiment 4 5 49 | LINK thickness 9 8 50 | LINK same_experiment 10 12 51 | LINK thickness 15 14 52 | LINK thickness 20 19 53 | -------------------------------------------------------------------------------- /code/scripts/run_cross_validation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs 5-fold cross validation for SOFC-Exp corpus related experiments. 4 | # Paths assume that this script is run from within the scripts folder. 5 | 6 | for f in {1..5}; do 7 | 8 | # Uncomment one of the run_experiment... lines depending on which experiments you want to do. 9 | # Starts five runs in parallel - change if your computing environment allows only one GPU process at a time, 10 | # add call to scheduler, etc. 11 | 12 | # Variations of the models tested are configured in the respective run_experiments... file. 13 | 14 | # Experiment 15 | # run_experiment_sentence_classification.sh $f 16 | 17 | # Entity Types 18 | # run_experiment_entity_typing.sh $f 19 | 20 | # Slot filling 21 | # run_experiment_slot_filling.sh $f 22 | done 23 | 24 | # The above processes all write their results to prediction files. 25 | # Once they are done, collect results and compute performance statistics. 26 | # Use the file source/evaluation/evaluate_cross_validation.py with appropriate command line arguments. 27 | 28 | # Examples: 29 | # sentence classification: 30 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "multiclass" -num_labels 2 31 | # entity typing: 32 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "conll" -task "entity_types" 33 | # slot typing: 34 | # python -u evaluation/evaluate_cross_validation.py -predictions_dir DIRECTORY_WITH_MODEL_PREDICTIONS -eval_mode "conll" -task "slot_types" 35 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6073263.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 616 2 | 2 0 617 791 3 | 3 0 792 1115 4 | 4 0 1116 1195 5 | 5 1 1196 1391 6 | 6 1 1392 1507 7 | 7 1 1508 1708 8 | 8 1 1709 1984 9 | 9 1 1985 2094 10 | 10 0 2095 2263 11 | 11 1 2264 2360 12 | 12 0 2361 2451 13 | 13 0 2452 2615 14 | 14 1 2616 2796 15 | 15 1 2797 2989 16 | 16 1 2990 3134 17 | 17 0 3135 3353 18 | 18 0 3354 3471 19 | 19 0 3472 3630 20 | 20 0 3631 3791 21 | 21 0 3792 3835 22 | 22 0 3836 3932 23 | 23 0 3933 4025 24 | 24 0 4026 4183 25 | 25 0 4184 4265 26 | 26 0 4266 4564 27 | 27 0 4565 4684 28 | 28 0 4685 4725 29 | 29 0 4726 4961 30 | 30 0 4962 5131 31 | 31 0 5132 5273 32 | 32 1 5274 5498 33 | 33 0 5499 5558 34 | 34 0 5559 5663 35 | 35 0 5664 5802 36 | 36 0 5803 5963 37 | 37 0 5964 6172 38 | 38 0 6173 6249 39 | 39 0 6250 6368 40 | 40 0 6369 6516 41 | 41 0 6517 6643 42 | 42 0 6644 6749 43 | 43 0 6750 6897 44 | 44 0 6898 7032 45 | 45 0 7033 7178 46 | 46 0 7179 7373 47 | 47 0 7374 7480 48 | 48 0 7481 7630 49 | 49 0 7631 7754 50 | 50 0 7755 7874 51 | 51 0 7875 8017 52 | 52 0 8018 8130 53 | 53 1 8131 8353 54 | 54 0 8354 8517 55 | 55 0 8518 8771 56 | 56 0 8772 8913 57 | 57 0 8914 9122 58 | 58 0 9123 9260 59 | 59 0 9261 9319 60 | 60 0 9320 9437 61 | 61 0 9438 9501 62 | 62 0 9502 9641 63 | 63 0 9642 9840 64 | 64 0 9841 10007 65 | 65 0 10008 10171 66 | 66 0 10172 10253 67 | 67 0 10254 10355 68 | 68 0 10356 10532 69 | 69 0 10533 10609 70 | 70 1 10610 10781 71 | 71 0 10782 10905 72 | 72 0 10906 11005 73 | 73 1 11006 11105 74 | 74 0 11106 11166 75 | 75 0 11167 11280 76 | 76 1 11281 11403 77 | 77 1 11404 11548 78 | 78 1 11549 11653 79 | 79 1 11654 11883 80 | 80 0 11884 12043 81 | 81 0 12044 12218 82 | 82 0 12219 12370 83 | 83 0 12371 12573 84 | 84 0 12574 12675 85 | 85 0 12676 12814 86 | 86 1 12815 12919 87 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6427619.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 717 2 | 2 0 718 831 3 | 3 1 832 940 4 | 4 0 941 1098 5 | 5 1 1099 1299 6 | 6 0 1300 1460 7 | 7 0 1461 1626 8 | 8 0 1627 1714 9 | 9 0 1715 1894 10 | 10 0 1895 2008 11 | 11 0 2009 2170 12 | 12 0 2171 2293 13 | 13 0 2294 2373 14 | 14 0 2374 2439 15 | 15 0 2440 2574 16 | 16 0 2575 2693 17 | 17 0 2694 2809 18 | 18 0 2810 3026 19 | 19 1 3027 3197 20 | 20 1 3198 3323 21 | 21 0 3324 3488 22 | 22 0 3489 3574 23 | 23 0 3575 3678 24 | 24 0 3679 3789 25 | 25 0 3790 3938 26 | 26 0 3939 4150 27 | 27 0 4151 4283 28 | 28 0 4284 4459 29 | 29 0 4460 4555 30 | 30 0 4556 4623 31 | 31 0 4624 4731 32 | 32 0 4732 4814 33 | 33 0 4815 4941 34 | 34 0 4942 5024 35 | 35 0 5025 5100 36 | 36 0 5101 5194 37 | 37 0 5195 5412 38 | 38 0 5413 5497 39 | 39 0 5498 5687 40 | 40 0 5688 5963 41 | 41 0 5964 6152 42 | 42 0 6153 6315 43 | 43 0 6316 6418 44 | 44 0 6419 6654 45 | 45 0 6655 6807 46 | 46 1 6808 6864 47 | 47 0 6865 6989 48 | 48 0 6990 7176 49 | 49 0 7177 7293 50 | 50 0 7294 7439 51 | 51 0 7440 7537 52 | 52 0 7538 7646 53 | 53 0 7647 7766 54 | 54 0 7767 7895 55 | 55 0 7896 7990 56 | 56 0 7991 8179 57 | 57 0 8180 8391 58 | 58 0 8392 8478 59 | 59 0 8479 8637 60 | 60 0 8638 8703 61 | 61 0 8704 8882 62 | 62 0 8883 8947 63 | 63 0 8948 9026 64 | 64 0 9027 9146 65 | 65 0 9147 9283 66 | 66 0 9284 9428 67 | 67 1 9429 9633 68 | 68 0 9634 9836 69 | 69 0 9837 9957 70 | 70 0 9958 10107 71 | 71 0 10108 10215 72 | 72 0 10216 10357 73 | 73 0 10358 10508 74 | 74 0 10509 10700 75 | 75 1 10701 10868 76 | 76 0 10869 11059 77 | 77 0 11060 11257 78 | 78 0 11258 11331 79 | 79 1 11332 11483 80 | 80 0 11484 11665 81 | 81 0 11666 11768 82 | 82 1 11769 12120 83 | 83 1 12121 12195 84 | 84 1 12196 12505 85 | 85 0 12506 12606 86 | 86 0 12607 12687 87 | 87 0 12688 12784 88 | 88 0 12785 12892 89 | 89 1 12893 13073 90 | 90 1 13074 13209 91 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4578433.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 1415 2 | 2 1 1416 2054 3 | 3 0 2055 2196 4 | 4 0 2197 2492 5 | 5 0 2493 2675 6 | 6 0 2676 2871 7 | 7 0 2872 3105 8 | 8 0 3106 3539 9 | 9 0 3540 3734 10 | 10 1 3735 3931 11 | 11 1 3932 4061 12 | 12 0 4062 4283 13 | 13 0 4284 4572 14 | 14 0 4573 5003 15 | 15 0 5004 5315 16 | 16 0 5316 5467 17 | 17 0 5468 5771 18 | 18 1 5772 6018 19 | 19 1 6019 6247 20 | 20 1 6248 6477 21 | 21 1 6478 6658 22 | 22 0 6659 6901 23 | 23 1 6902 7334 24 | 24 0 7335 7662 25 | 25 0 7663 7893 26 | 26 0 7894 8049 27 | 27 0 8050 8152 28 | 28 0 8153 8289 29 | 29 0 8290 8479 30 | 30 0 8480 8642 31 | 31 0 8643 8769 32 | 32 0 8770 9015 33 | 33 0 9016 9313 34 | 34 0 9314 9638 35 | 35 1 9639 9765 36 | 36 0 9766 10042 37 | 37 1 10043 10198 38 | 38 0 10199 10484 39 | 39 0 10485 10748 40 | 40 0 10749 10817 41 | 41 0 10818 11106 42 | 42 1 11107 11338 43 | 43 0 11339 11772 44 | 44 0 11773 12027 45 | 45 0 12028 12120 46 | 46 0 12121 12475 47 | 47 0 12476 12665 48 | 48 0 12666 12839 49 | 49 0 12840 12955 50 | 50 0 12956 13118 51 | 51 0 13119 13375 52 | 52 1 13376 13504 53 | 53 0 13505 13769 54 | 54 0 13770 14032 55 | 55 0 14033 14279 56 | 56 1 14280 14495 57 | 57 0 14496 14728 58 | 58 0 14729 14844 59 | 59 0 14845 14899 60 | 60 0 14900 14988 61 | 61 0 14989 15206 62 | 62 0 15207 15284 63 | 63 0 15285 15386 64 | 64 0 15387 15444 65 | 65 0 15445 15513 66 | 66 0 15514 15591 67 | 67 0 15592 15668 68 | 68 0 15669 15813 69 | 69 0 15814 15901 70 | 70 0 15902 16001 71 | 71 0 16002 16152 72 | 72 0 16153 16282 73 | 73 0 16283 16388 74 | 74 0 16389 16505 75 | 75 1 16506 16702 76 | 76 0 16703 17060 77 | 77 0 17061 17298 78 | 78 0 17299 17411 79 | 79 0 17412 17579 80 | 80 0 17580 17653 81 | 81 0 17654 17771 82 | 82 0 17772 17940 83 | 83 0 17941 18068 84 | 84 0 18069 18279 85 | 85 0 18280 18408 86 | 86 1 18409 18590 87 | 87 0 18591 18753 88 | -------------------------------------------------------------------------------- /code/source/model/sequenceTagger.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | 20 | import logging 21 | 22 | import torch 23 | from torch import nn 24 | 25 | from model.encoder import Encoder 26 | 27 | 28 | log = logging.getLogger("sequence_tagger") 29 | 30 | 31 | class SequenceTagger(nn.Module): 32 | def __init__(self, weights_matrix, embedding_options, options, device): 33 | 34 | super(SequenceTagger, self).__init__() 35 | 36 | print("Initializing sequence tagger ...") 37 | 38 | self.encoder = Encoder(weights_matrix, embedding_options, options, device) 39 | 40 | self.linear = torch.nn.Linear(options['hidden_size'] * 2, options['num_labels']) 41 | 42 | self.to(device) 43 | 44 | 45 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths): 46 | sentence_tensor, output_lengths = self.encoder(tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths) 47 | 48 | features = self.linear(sentence_tensor) 49 | 50 | assert(not torch.isnan(features).any()) 51 | return features 52 | 53 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC3793895.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 12 295 319 2 | SPAN 2 DEVICE 13 286 314 3 | SPAN 3 EXPERIMENT:current_exp 13 342 349 4 | SPAN 4 MATERIAL 13 431 442 5 | SPAN 5 DEVICE 18 178 199 6 | SPAN 6 MATERIAL 59 140 173 7 | SPAN 7 MATERIAL 59 174 177 8 | SPAN 8 EXPERIMENT:current_exp 123 136 142 9 | SPAN 9 MATERIAL 123 148 149 10 | SPAN 10 EXPERIMENT:current_exp 123 150 152 11 | SPAN 11 MATERIAL 123 163 175 12 | SPAN 12 MATERIAL 123 186 198 13 | SPAN 13 VALUE 123 202 208 14 | SPAN 14 MATERIAL 131 103 137 15 | SPAN 15 DEVICE 154 12 17 16 | SPAN 16 EXPERIMENT:current_exp 154 23 31 17 | SPAN 17 DEVICE 154 117 121 18 | SPAN 18 MATERIAL 155 36 62 19 | SPAN 19 EXPERIMENT:general_info 155 70 72 20 | SPAN 20 DEVICE 155 95 99 21 | SPAN 21 EXPERIMENT:previous_work 155 113 121 22 | SPAN 22 VALUE 155 144 165 23 | SPAN 23 VALUE 155 169 175 24 | SPAN 24 MATERIAL 156 68 102 25 | SPAN 25 EXPERIMENT:current_exp 156 204 211 26 | SPAN 26 VALUE 156 222 232 27 | SPAN 27 MATERIAL 156 241 267 28 | SPAN 28 VALUE 156 278 284 29 | SPAN 29 MATERIAL 156 290 304 30 | SPAN 30 EXPERIMENT:current_exp 166 11 19 31 | SPAN 31 MATERIAL 166 24 48 32 | SPAN 32 VALUE 166 55 77 33 | SPAN 33 VALUE 166 81 88 34 | SPAN 34 EXPERIMENT:current_exp 167 18 26 35 | SPAN 35 MATERIAL 167 31 64 36 | EXPERIMENT 1 3 37 | device 2 38 | cathode_material 4 39 | EXPERIMENT 2 8 40 | cathode_material 9 41 | EXPERIMENT 3 10 42 | fuel_used 11 43 | fuel_used 12 44 | working_temperature 13 45 | EXPERIMENT 4 16 46 | device 15 47 | device 17 48 | EXPERIMENT 5 19 49 | cathode_material 18 50 | device 20 51 | EXPERIMENT 6 21 52 | resistance 22 53 | working_temperature 23 54 | EXPERIMENT 7 25 55 | resistance 26 56 | cathode_material 27 57 | working_temperature 28 58 | electrolyte_material 29 59 | EXPERIMENT 8 30 60 | cathode_material 31 61 | resistance 32 62 | working_temperature 33 63 | EXPERIMENT 9 34 64 | cathode_material 35 65 | LINK same_experiment 8 10 66 | LINK experiment_variation 25 21 67 | LINK experiment_variation 34 30 68 | LINK coreference 1 4 69 | LINK coreference 6 7 70 | -------------------------------------------------------------------------------- /sofcexp.yml: -------------------------------------------------------------------------------- 1 | name: sofcexp 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - blas=1.0=mkl 7 | - ca-certificates=2020.6.24=0 8 | - certifi=2020.6.20=py38_0 9 | - cudatoolkit=10.2.89=hfd86e86_1 10 | - freetype=2.10.2=h5ab3b9f_0 11 | - intel-openmp=2020.1=217 12 | - joblib=0.15.1=py_0 13 | - jpeg=9b=habf39ab_1 14 | - ld_impl_linux-64=2.33.1=h53a641e_7 15 | - libedit=3.1.20191231=h7b6447c_0 16 | - libffi=3.3=he6710b0_1 17 | - libgcc-ng=9.1.0=hdf63c60_0 18 | - libgfortran-ng=7.3.0=hdf63c60_0 19 | - libpng=1.6.37=hbc83047_0 20 | - libstdcxx-ng=9.1.0=hdf63c60_0 21 | - libtiff=4.1.0=h2733197_1 22 | - lz4-c=1.9.2=he6710b0_0 23 | - mkl=2019.4=243 24 | - mkl-service=2.3.0=py38he904b0f_0 25 | - mkl_fft=1.1.0=py38h23d657b_0 26 | - mkl_random=1.1.0=py38h962f231_0 27 | - ncurses=6.2=he6710b0_1 28 | - ninja=1.9.0=py38hfd86e86_0 29 | - numpy=1.18.5=py38ha1c710e_0 30 | - numpy-base=1.18.5=py38hde5b4d6_0 31 | - olefile=0.46=py_0 32 | - openssl=1.1.1g=h7b6447c_0 33 | - pillow=7.1.2=py38hb39fc2d_0 34 | - pip=20.1.1=py38_1 35 | - python=3.8.3=hcff3b4d_0 36 | - readline=8.0=h7b6447c_0 37 | - scikit-learn=0.23.1=py38h423224d_0 38 | - scipy=1.5.0=py38h0b6359f_0 39 | - setuptools=47.3.1=py38_0 40 | - six=1.15.0=py_0 41 | - sqlite=3.32.3=h62c20be_0 42 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 43 | - tk=8.6.10=hbc83047_0 44 | - wheel=0.34.2=py38_0 45 | - xz=5.2.5=h7b6447c_0 46 | - zlib=1.2.11=h7b6447c_3 47 | - zstd=1.4.4=h0b5b093_3 48 | - pytorch=1.5.1=py3.8_cuda10.2.89_cudnn7.6.5_0 49 | - torchvision=0.6.1=py38_cu102 50 | - pip: 51 | - chardet==3.0.4 52 | - click==7.1.2 53 | - filelock==3.0.12 54 | - idna==2.10 55 | - packaging==20.4 56 | - pyparsing==2.4.7 57 | - regex==2020.6.8 58 | - requests==2.24.0 59 | - sacremoses==0.0.43 60 | - sentencepiece==0.1.92 61 | - tokenizers==0.8.0rc4 62 | - torch==1.5.1 63 | - tqdm==4.47.0 64 | - transformers==3.0.0 65 | - urllib3==1.25.9 66 | prefix: INSERT-YOUR-CONDA-PATH/envs/sofcexp 67 | 68 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5706185.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:general_info 376 39 46 2 | SPAN 2 DEVICE 376 68 115 3 | SPAN 3 DEVICE 376 117 122 4 | SPAN 4 EXPERIMENT:previous_work 379 23 34 5 | SPAN 5 MATERIAL 379 35 64 6 | SPAN 6 MATERIAL 379 66 71 7 | SPAN 7 EXPERIMENT:previous_work 379 107 115 8 | SPAN 8 DEVICE 379 147 152 9 | SPAN 9 MATERIAL 380 4 9 10 | SPAN 10 EXPERIMENT:previous_work 380 18 29 11 | SPAN 11 MATERIAL 380 33 47 12 | SPAN 12 MATERIAL 380 49 52 13 | SPAN 13 MATERIAL 381 22 31 14 | SPAN 14 EXPERIMENT:previous_work 381 41 44 15 | SPAN 15 VALUE 381 120 131 16 | SPAN 16 VALUE 381 136 141 17 | SPAN 17 VALUE 381 145 151 18 | SPAN 18 EXPERIMENT:previous_work 382 15 20 19 | SPAN 19 MATERIAL 382 30 39 20 | SPAN 20 EXPERIMENT:previous_work 382 58 61 21 | SPAN 21 DEVICE 382 110 140 22 | SPAN 22 MATERIAL 383 29 40 23 | SPAN 23 MATERIAL 383 42 46 24 | SPAN 24 MATERIAL 385 53 57 25 | SPAN 25 EXPERIMENT:previous_work 386 33 38 26 | SPAN 26 DEVICE 386 75 104 27 | SPAN 27 MATERIAL 387 100 133 28 | SPAN 28 MATERIAL 387 135 140 29 | SPAN 29 DEVICE 387 160 195 30 | SPAN 30 EXPERIMENT:previous_work 387 206 218 31 | SPAN 31 EXPERIMENT:previous_work 388 130 136 32 | SPAN 32 VALUE 388 159 171 33 | EXPERIMENT 1 1 34 | device 2 35 | EXPERIMENT 2 4 36 | cathode_material 5 37 | EXPERIMENT 3 7 38 | device 8 39 | EXPERIMENT 4 10 40 | cathode_material 9 41 | cathode_material 11 42 | EXPERIMENT 5 14 43 | cathode_material 13 44 | current_density 15 45 | time_of_operation 16 46 | working_temperature 17 47 | EXPERIMENT 6 18 48 | cathode_material 19 49 | EXPERIMENT 7 20 50 | device 21 51 | EXPERIMENT 8 25 52 | cathode_material 24 53 | device 26 54 | EXPERIMENT 9 30 55 | electrolyte_material 27 56 | device 29 57 | EXPERIMENT 10 31 58 | open_circuit_voltage 32 59 | LINK same_experiment 4 7 60 | LINK same_experiment 7 10 61 | LINK same_experiment 10 14 62 | LINK same_experiment 14 18 63 | LINK same_experiment 18 20 64 | LINK same_experiment 30 31 65 | LINK coreference 2 3 66 | LINK coreference 5 6 67 | LINK coreference 11 12 68 | LINK coreference 22 23 69 | LINK coreference 27 28 70 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC3730159.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 DEVICE 162 29 34 2 | SPAN 2 MATERIAL 162 44 46 3 | SPAN 3 MATERIAL 162 56 59 4 | SPAN 4 MATERIAL 162 60 62 5 | SPAN 5 MATERIAL 162 75 77 6 | SPAN 6 EXPERIMENT:previous_work 162 97 105 7 | SPAN 7 EXPERIMENT:previous_work 162 110 122 8 | SPAN 8 DEVICE 172 4 8 9 | SPAN 9 MATERIAL 172 22 30 10 | SPAN 10 MATERIAL 172 33 36 11 | SPAN 11 MATERIAL 172 38 40 12 | SPAN 12 EXPERIMENT:previous_work 172 65 77 13 | SPAN 13 EXPERIMENT:previous_work 173 75 83 14 | SPAN 14 MATERIAL 173 110 118 15 | SPAN 15 MATERIAL 173 121 124 16 | SPAN 16 MATERIAL 173 126 128 17 | SPAN 17 EXPERIMENT:previous_work 174 11 17 18 | SPAN 18 MATERIAL 174 27 30 19 | SPAN 19 VALUE 174 69 79 20 | SPAN 20 MATERIAL 174 111 116 21 | SPAN 21 DEVICE 174 129 133 22 | SPAN 22 EXPERIMENT:previous_work 174 134 144 23 | SPAN 23 MATERIAL 174 151 154 24 | SPAN 24 VALUE 174 181 191 25 | SPAN 25 MATERIAL 174 198 203 26 | SPAN 26 DEVICE 175 4 8 27 | SPAN 27 EXPERIMENT:previous_work 175 16 24 28 | SPAN 28 MATERIAL 175 28 43 29 | SPAN 29 MATERIAL 175 67 99 30 | SPAN 30 EXPERIMENT:previous_work 175 125 137 31 | SPAN 31 DEVICE 176 4 8 32 | SPAN 32 MATERIAL 176 59 74 33 | SPAN 33 MATERIAL 176 76 90 34 | SPAN 34 MATERIAL 176 100 105 35 | SPAN 35 DEVICE 177 58 62 36 | SPAN 36 EXPERIMENT:previous_work 177 74 83 37 | SPAN 37 VALUE 177 89 94 38 | EXPERIMENT 1 6 39 | device 1 40 | cathode_material 2 41 | electrolyte_material 3 42 | anode_material 4 43 | fuel_used 5 44 | EXPERIMENT 2 12 45 | device 8 46 | cathode_material 9 47 | cathode_material 10 48 | cathode_material 11 49 | EXPERIMENT 3 13 50 | cathode_material 14 51 | cathode_material 15 52 | cathode_material 16 53 | EXPERIMENT 4 17 54 | fuel_used 18 55 | current_density 19 56 | fuel_used 20 57 | EXPERIMENT 5 22 58 | device 21 59 | fuel_used 23 60 | current_density 24 61 | fuel_used 25 62 | EXPERIMENT 6 27 63 | device 26 64 | electrolyte_material 28 65 | cathode_material 29 66 | EXPERIMENT 7 36 67 | device 35 68 | voltage 37 69 | LINK same_experiment 6 7 70 | LINK same_experiment 12 13 71 | LINK experiment_variation 22 17 72 | LINK same_experiment 13 17 73 | LINK same_experiment 27 30 74 | LINK same_experiment 30 36 75 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4663492.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 672 2 | 2 0 673 782 3 | 3 0 783 953 4 | 4 1 954 1168 5 | 5 0 1169 1309 6 | 6 0 1310 1466 7 | 7 1 1467 1826 8 | 8 1 1827 1989 9 | 9 0 1990 2100 10 | 10 0 2101 2296 11 | 11 0 2297 2487 12 | 12 0 2488 2551 13 | 13 0 2552 2680 14 | 14 0 2681 2805 15 | 15 0 2806 2884 16 | 16 0 2885 3002 17 | 17 0 3003 3143 18 | 18 0 3144 3358 19 | 19 0 3359 3597 20 | 20 0 3598 3726 21 | 21 1 3727 3865 22 | 22 0 3866 4002 23 | 23 0 4003 4182 24 | 24 0 4183 4343 25 | 25 0 4344 4528 26 | 26 0 4529 4661 27 | 27 0 4662 4794 28 | 28 0 4795 4943 29 | 29 0 4944 5142 30 | 30 0 5143 5242 31 | 31 0 5243 5430 32 | 32 0 5431 5659 33 | 33 0 5660 5814 34 | 34 0 5815 6086 35 | 35 0 6087 6285 36 | 36 0 6286 6562 37 | 37 0 6563 6639 38 | 38 0 6640 6848 39 | 39 0 6849 6958 40 | 40 0 6959 7219 41 | 41 1 7220 7432 42 | 42 0 7433 7513 43 | 43 1 7514 7720 44 | 44 0 7721 7804 45 | 45 1 7805 7919 46 | 46 1 7920 8195 47 | 47 1 8196 8395 48 | 48 0 8396 8549 49 | 49 0 8550 8746 50 | 50 0 8747 8965 51 | 51 0 8966 9089 52 | 52 0 9090 9350 53 | 53 0 9351 9639 54 | 54 0 9640 9758 55 | 55 0 9759 9920 56 | 56 0 9921 10011 57 | 57 0 10012 10086 58 | 58 0 10087 10224 59 | 59 0 10225 10532 60 | 60 0 10533 10737 61 | 61 0 10738 10877 62 | 62 0 10878 11081 63 | 63 0 11082 11288 64 | 64 0 11289 11412 65 | 65 0 11413 11587 66 | 66 0 11588 11765 67 | 67 0 11766 11968 68 | 68 0 11969 12170 69 | 69 0 12171 12347 70 | 70 0 12348 12509 71 | 71 0 12510 12659 72 | 72 0 12660 12861 73 | 73 0 12862 12981 74 | 74 0 12982 13322 75 | 75 0 13323 13499 76 | 76 0 13500 13604 77 | 77 0 13605 13830 78 | 78 0 13831 13999 79 | 79 0 14000 14164 80 | 80 0 14165 14302 81 | 81 0 14303 14422 82 | 82 0 14423 14599 83 | 83 0 14600 14767 84 | 84 0 14768 14935 85 | 85 1 14936 15057 86 | 86 0 15058 15197 87 | 87 0 15198 15303 88 | 88 0 15304 15404 89 | 89 0 15405 15488 90 | 90 0 15489 15603 91 | 91 0 15604 15702 92 | 92 0 15703 15777 93 | 93 0 15778 15834 94 | 94 0 15835 15893 95 | 95 0 15894 16050 96 | 96 0 16051 16216 97 | 97 0 16217 16334 98 | 98 0 16335 16520 99 | 99 0 16521 16704 100 | 100 0 16705 16814 101 | 101 0 16815 16883 102 | 102 0 16884 17000 103 | 103 0 17001 17005 104 | 104 0 17006 17051 105 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC4222441.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 1 652 657 2 | SPAN 2 EXPERIMENT:current_exp 1 695 702 3 | SPAN 3 DEVICE 1 716 738 4 | SPAN 4 DEVICE 2 4 9 5 | SPAN 5 EXPERIMENT:current_exp 2 20 28 6 | SPAN 6 MATERIAL 2 32 40 7 | SPAN 7 DEVICE 8 30 52 8 | SPAN 8 DEVICE 8 54 59 9 | SPAN 9 DEVICE 9 0 4 10 | SPAN 10 EXPERIMENT:general_info 9 5 8 11 | SPAN 11 MATERIAL 9 21 29 12 | SPAN 12 MATERIAL 9 60 71 13 | SPAN 13 DEVICE 20 56 61 14 | SPAN 14 EXPERIMENT:future_work 20 73 80 15 | SPAN 15 MATERIAL 20 114 125 16 | SPAN 16 MATERIAL 20 141 147 17 | SPAN 17 MATERIAL 20 152 164 18 | SPAN 18 EXPERIMENT:general_info 21 21 26 19 | SPAN 19 MATERIAL 21 46 51 20 | SPAN 20 MATERIAL 21 86 112 21 | SPAN 21 MATERIAL 21 114 117 22 | SPAN 22 EXPERIMENT:current_exp 37 7 12 23 | SPAN 23 DEVICE 37 55 59 24 | SPAN 24 MATERIAL 37 107 109 25 | SPAN 25 VALUE 37 121 132 26 | SPAN 26 VALUE 37 172 177 27 | SPAN 27 EXPERIMENT:current_exp 39 18 26 28 | SPAN 28 DEVICE 39 35 40 29 | SPAN 29 VALUE 39 71 88 30 | SPAN 30 VALUE 44 23 34 31 | SPAN 31 VALUE 45 393 420 32 | SPAN 32 DEVICE 52 63 67 33 | SPAN 33 EXPERIMENT:current_exp 52 73 82 34 | SPAN 34 VALUE 52 129 140 35 | SPAN 35 VALUE 52 142 150 36 | SPAN 36 VALUE 52 197 212 37 | SPAN 37 VALUE 52 214 223 38 | SPAN 38 EXPERIMENT:previous_work 165 36 44 39 | SPAN 39 MATERIAL 165 45 47 40 | SPAN 40 MATERIAL 165 63 65 41 | SPAN 41 MATERIAL 165 66 69 42 | SPAN 42 MATERIAL 165 74 76 43 | SPAN 43 MATERIAL 165 77 80 44 | EXPERIMENT 1 2 45 | anode_material 1 46 | device 3 47 | EXPERIMENT 2 5 48 | device 4 49 | fuel_used 6 50 | EXPERIMENT 3 10 51 | device 9 52 | fuel_used 11 53 | fuel_used 12 54 | EXPERIMENT 4 14 55 | device 13 56 | fuel_used 15 57 | fuel_used 16 58 | fuel_used 17 59 | EXPERIMENT 5 18 60 | anode_material 19 61 | anode_material 20 62 | EXPERIMENT 6 22 63 | device 23 64 | fuel_used 24 65 | current_density 25 66 | time_of_operation 26 67 | EXPERIMENT 7 27 68 | device 28 69 | current_density 29 70 | EXPERIMENT 8 33 71 | device 32 72 | current_density 34 73 | time_of_operation 35 74 | current_density 36 75 | time_of_operation 37 76 | EXPERIMENT 9 38 77 | anode_material 39 78 | anode_material 40 79 | anode_material 41 80 | anode_material 42 81 | anode_material 43 82 | LINK same_experiment 2 5 83 | LINK coreference 7 8 84 | LINK coreference 7 9 85 | LINK coreference 20 21 86 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6164086.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 774 2 | 2 1 775 1081 3 | 3 1 1082 1260 4 | 4 0 1261 1498 5 | 5 1 1499 1926 6 | 6 0 1927 2020 7 | 7 0 2021 2166 8 | 8 0 2167 2258 9 | 9 0 2259 2329 10 | 10 0 2330 2460 11 | 11 0 2461 2722 12 | 12 0 2723 2962 13 | 13 0 2963 3187 14 | 14 0 3188 3321 15 | 15 0 3322 3428 16 | 16 0 3429 3558 17 | 17 0 3559 3680 18 | 18 0 3681 3888 19 | 19 0 3889 4078 20 | 20 0 4079 4254 21 | 21 0 4255 4459 22 | 22 0 4460 4538 23 | 23 1 4539 4706 24 | 24 1 4707 4934 25 | 25 1 4935 5186 26 | 26 0 5187 5398 27 | 27 0 5399 5448 28 | 28 0 5449 5626 29 | 29 0 5627 5685 30 | 30 0 5686 5802 31 | 31 1 5803 6084 32 | 32 0 6085 6235 33 | 33 0 6236 6253 34 | 34 0 6254 6413 35 | 35 0 6414 6706 36 | 36 0 6707 6842 37 | 37 0 6843 6895 38 | 38 0 6896 7143 39 | 39 0 7144 7247 40 | 40 0 7248 7370 41 | 41 0 7371 7533 42 | 42 0 7534 7827 43 | 43 0 7828 8038 44 | 44 0 8039 8116 45 | 45 0 8117 8256 46 | 46 0 8257 8383 47 | 47 0 8384 8463 48 | 48 0 8464 8643 49 | 49 0 8644 8836 50 | 50 0 8837 9032 51 | 51 0 9033 9175 52 | 52 0 9176 9299 53 | 53 0 9300 9484 54 | 54 0 9485 9565 55 | 55 0 9566 9716 56 | 56 0 9717 9949 57 | 57 0 9950 10095 58 | 58 0 10096 10157 59 | 59 0 10158 10261 60 | 60 0 10262 10438 61 | 61 0 10439 10582 62 | 62 0 10583 10686 63 | 63 0 10687 10792 64 | 64 0 10793 10897 65 | 65 1 10898 11016 66 | 66 1 11017 11119 67 | 67 0 11120 11209 68 | 68 0 11210 11400 69 | 69 0 11401 11530 70 | 70 0 11531 11594 71 | 71 0 11595 11672 72 | 72 1 11673 11856 73 | 73 1 11857 11894 74 | 74 0 11895 12093 75 | 75 0 12094 12225 76 | 76 0 12226 12368 77 | 77 0 12369 12510 78 | 78 0 12511 12580 79 | 79 0 12581 12660 80 | 80 0 12661 12918 81 | 81 0 12919 13010 82 | 82 0 13011 13252 83 | 83 0 13253 13312 84 | 84 0 13313 13430 85 | 85 0 13431 13695 86 | 86 0 13696 13817 87 | 87 0 13818 14155 88 | 88 0 14156 14312 89 | 89 0 14313 14495 90 | 90 0 14496 14648 91 | 91 0 14649 14775 92 | 92 0 14776 14851 93 | 93 0 14852 14966 94 | 94 0 14967 15096 95 | 95 0 15097 15179 96 | 96 0 15180 15350 97 | 97 0 15351 15522 98 | 98 0 15523 15583 99 | 99 0 15584 15759 100 | 100 0 15760 16000 101 | 101 0 16001 16101 102 | 102 1 16102 16290 103 | 103 0 16291 16531 104 | 104 1 16532 16710 105 | 105 0 16711 16808 106 | 106 1 16809 16932 107 | 107 1 16933 17097 108 | 108 0 17098 17260 109 | 109 0 17261 17369 110 | 110 0 17370 17525 111 | 111 0 17526 17647 112 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /code/source/constants.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | 20 | PAD_IDX = 0 21 | OOV_IDX = 1 22 | PAD_TOK = "" 23 | OOV_TOK = "" 24 | 25 | START_TAG = "" 26 | STOP_TAG = "" 27 | 28 | # for coarse-grained concepts 29 | entity2index = {'O': 0, 'B-DEVICE': 1, 'B-EXPERIMENT': 2, 'B-MATERIAL': 3, 'B-VALUE': 4, 'I-DEVICE': 5, 'I-EXPERIMENT': 6, 'I-MATERIAL': 7, 'I-VALUE': 8} 30 | index2entity = dict((v,k) for k,v in entity2index.items()) 31 | index2entity[-1] = "-" # for nicer debugging output 32 | 33 | # for fine-grained experiment slots 34 | slot2index = {'O': 0, "B-conductivity": 1, "I-conductivity": 2, "B-current_density": 3, "I-current_density": 4, "B-degradation_rate": 5, "I-degradation_rate": 6, 35 | "B-device": 7, "I-device": 8, "B-experiment_evoking_word": 9, "I-experiment_evoking_word": 10, "B-fuel_used": 11, "I-fuel_used": 12, "B-open_circuit_voltage": 13, 36 | "I-open_circuit_voltage": 14, "B-power_density": 15, "I-power_density": 16, "B-resistance": 17, "I-resistance": 18, "B-thickness": 19, "I-thickness": 20, "B-time_of_operation": 21, 37 | "I-time_of_operation": 22, "B-voltage": 23, "I-voltage": 24, "B-working_temperature": 25, "I-working_temperature": 26, "B-anode_material": 27, "B-cathode_material": 28, 38 | "B-electrolyte_material": 29, "B-interlayer_material": 30, "I-anode_material": 31, "I-cathode_material": 32, "I-electrolyte_material": 33, 39 | "I-interlayer_material": 34, "B-support_material": 35, "I-support_material": 36, "none": 0, "SAME_EXPERIMENT": 0, "B-interconnect_material": 0, "I-interconnect_material": 0} 40 | index2slot = dict((v,k) for k,v in slot2index.items()) 41 | index2slot[0] = "O" 42 | 43 | RANDOM_SEED = 300 44 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6370853.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 23 35 53 2 | SPAN 2 MATERIAL 23 55 60 3 | SPAN 3 DEVICE 104 145 155 4 | SPAN 4 MATERIAL 104 165 175 5 | SPAN 5 EXPERIMENT:current_exp 104 193 202 6 | SPAN 6 EXPERIMENT:current_exp 104 207 215 7 | SPAN 7 MATERIAL 104 231 234 8 | SPAN 8 DEVICE 105 4 14 9 | SPAN 9 MATERIAL 105 39 49 10 | SPAN 10 MATERIAL 105 52 55 11 | SPAN 11 MATERIAL 105 58 62 12 | SPAN 12 MATERIAL 105 65 74 13 | SPAN 13 EXPERIMENT:current_exp 105 80 86 14 | SPAN 14 MATERIAL 105 101 103 15 | SPAN 15 MATERIAL 105 142 145 16 | SPAN 16 EXPERIMENT:current_exp 106 28 32 17 | SPAN 17 VALUE 106 33 38 18 | SPAN 18 VALUE 106 40 45 19 | SPAN 19 VALUE 106 47 52 20 | SPAN 20 VALUE 106 58 70 21 | SPAN 21 MATERIAL 106 75 80 22 | SPAN 22 MATERIAL 106 82 92 23 | SPAN 23 MATERIAL 106 94 104 24 | SPAN 24 MATERIAL 106 110 121 25 | SPAN 25 VALUE 106 140 146 26 | SPAN 26 MATERIAL 106 161 163 27 | SPAN 27 MATERIAL 111 36 49 28 | SPAN 28 MATERIAL 111 54 67 29 | SPAN 29 EXPERIMENT:current_exp 111 73 83 30 | SPAN 30 VALUE 111 90 95 31 | SPAN 31 VALUE 111 100 112 32 | SPAN 32 MATERIAL 116 30 40 33 | SPAN 33 MATERIAL 116 42 52 34 | SPAN 34 MATERIAL 116 58 69 35 | SPAN 35 EXPERIMENT:current_exp 116 70 74 36 | SPAN 36 VALUE 116 75 80 37 | SPAN 37 VALUE 116 82 87 38 | SPAN 38 VALUE 116 93 104 39 | SPAN 39 VALUE 116 123 129 40 | SPAN 40 MATERIAL 116 133 135 41 | SPAN 41 MATERIAL 130 32 53 42 | SPAN 42 DEVICE 130 54 58 43 | SPAN 43 MATERIAL 130 66 77 44 | SPAN 44 EXPERIMENT:current_exp 130 84 91 45 | SPAN 45 VALUE 130 92 104 46 | SPAN 46 MATERIAL 130 119 121 47 | SPAN 47 VALUE 130 125 131 48 | EXPERIMENT 1 5 49 | device 3 50 | anode_material 4 51 | EXPERIMENT 2 6 52 | anode_material 7 53 | EXPERIMENT 3 13 54 | device 8 55 | anode_material 9 56 | interlayer_material 10 57 | electrolyte_material 11 58 | fuel_used 14 59 | fuel_used 15 60 | EXPERIMENT 4 16 61 | power_density 17 62 | power_density 18 63 | power_density 19 64 | power_density 20 65 | anode_material 21 66 | anode_material 22 67 | anode_material 23 68 | anode_material 24 69 | working_temperature 25 70 | fuel_used 26 71 | EXPERIMENT 5 29 72 | anode_material 27 73 | anode_material 28 74 | power_density 30 75 | power_density 31 76 | EXPERIMENT 6 35 77 | anode_material 32 78 | anode_material 33 79 | anode_material 34 80 | resistance 36 81 | resistance 37 82 | resistance 38 83 | working_temperature 39 84 | fuel_used 40 85 | EXPERIMENT 7 44 86 | support_material 41 87 | device 42 88 | anode_material 43 89 | power_density 45 90 | fuel_used 46 91 | working_temperature 47 92 | LINK experiment_variation 6 5 93 | LINK same_experiment 5 13 94 | LINK same_experiment 13 16 95 | LINK coreference 1 2 96 | LINK coreference 13 12 97 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6249295.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1055 2 | 2 0 1056 1268 3 | 3 0 1269 1472 4 | 4 1 1473 1729 5 | 5 1 1730 1895 6 | 6 1 1896 2081 7 | 7 0 2082 2485 8 | 8 0 2486 2664 9 | 9 0 2665 2820 10 | 10 0 2821 2974 11 | 11 0 2975 3111 12 | 12 0 3112 3370 13 | 13 0 3371 3498 14 | 14 0 3499 3622 15 | 15 0 3623 3779 16 | 16 0 3780 3946 17 | 17 0 3947 4159 18 | 18 0 4160 4488 19 | 19 0 4489 4659 20 | 20 0 4660 4821 21 | 21 0 4822 5026 22 | 22 0 5027 5292 23 | 23 0 5293 5369 24 | 24 0 5370 5600 25 | 25 0 5601 5735 26 | 26 0 5736 5859 27 | 27 0 5860 6027 28 | 28 0 6028 6168 29 | 29 0 6169 6290 30 | 30 0 6291 6416 31 | 31 0 6417 6519 32 | 32 0 6520 6714 33 | 33 0 6715 6872 34 | 34 0 6873 7079 35 | 35 1 7080 7303 36 | 36 0 7304 7414 37 | 37 0 7415 7556 38 | 38 0 7557 7646 39 | 39 0 7647 7762 40 | 40 0 7763 7892 41 | 41 0 7893 7953 42 | 42 0 7954 8056 43 | 43 0 8057 8218 44 | 44 0 8219 8458 45 | 45 0 8459 8600 46 | 46 0 8601 8774 47 | 47 0 8775 8917 48 | 48 0 8918 9078 49 | 49 0 9079 9255 50 | 50 0 9256 9381 51 | 51 0 9382 9501 52 | 52 0 9502 9614 53 | 53 0 9615 9723 54 | 54 0 9724 9852 55 | 55 0 9853 9922 56 | 56 0 9923 10138 57 | 57 0 10139 10209 58 | 58 0 10210 10284 59 | 59 0 10285 10441 60 | 60 0 10442 10621 61 | 61 0 10622 10803 62 | 62 0 10804 10807 63 | 63 0 10808 10945 64 | 64 0 10946 11038 65 | 65 0 11039 11042 66 | 66 0 11043 11155 67 | 67 0 11156 11363 68 | 68 0 11364 11564 69 | 69 0 11565 11626 70 | 70 0 11627 11694 71 | 71 1 11695 11856 72 | 72 1 11857 12063 73 | 73 0 12064 12169 74 | 74 0 12170 12328 75 | 75 0 12329 12459 76 | 76 1 12460 12574 77 | 77 0 12575 12697 78 | 78 0 12698 12702 79 | 79 0 12703 13059 80 | 80 0 13060 13139 81 | 81 1 13140 13294 82 | 82 1 13295 13564 83 | 83 0 13565 13725 84 | 84 0 13726 13840 85 | 85 0 13841 13934 86 | 86 0 13935 14106 87 | 87 1 14107 14338 88 | 88 0 14339 14540 89 | 89 0 14541 14674 90 | 90 0 14675 14797 91 | 91 1 14798 14965 92 | 92 0 14966 15116 93 | 93 0 15117 15254 94 | 94 0 15255 15350 95 | 95 0 15351 15465 96 | 96 0 15466 15687 97 | 97 0 15688 15834 98 | 98 1 15835 15897 99 | 99 1 15898 16068 100 | 100 1 16069 16216 101 | 101 0 16217 16347 102 | 102 0 16348 16484 103 | 103 0 16485 16616 104 | 104 0 16617 16740 105 | 105 0 16741 16824 106 | 106 0 16825 17126 107 | 107 0 17127 17254 108 | 108 0 17255 17347 109 | 109 0 17348 17402 110 | 110 0 17403 17634 111 | 111 0 17635 17773 112 | 112 0 17774 17902 113 | 113 0 17903 17951 114 | 114 0 17952 18089 115 | 115 0 18090 18235 116 | 116 0 18236 18408 117 | 117 0 18409 18529 118 | 118 0 18530 18673 119 | 119 0 18674 18781 120 | 120 0 18782 18969 121 | 121 0 18970 19078 122 | 122 0 19079 19296 123 | 123 0 19297 19490 124 | 124 0 19491 19604 125 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5457052.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 1236 2 | 2 1 1237 1432 3 | 3 0 1433 1640 4 | 4 0 1641 1766 5 | 5 0 1767 2023 6 | 6 0 2024 2144 7 | 7 0 2145 2297 8 | 8 0 2298 2485 9 | 9 0 2486 2638 10 | 10 0 2639 2786 11 | 11 0 2787 2902 12 | 12 0 2903 3031 13 | 13 0 3032 3169 14 | 14 0 3170 3373 15 | 15 0 3374 3567 16 | 16 0 3568 3741 17 | 17 0 3742 3957 18 | 18 0 3958 4101 19 | 19 0 4102 4314 20 | 20 0 4315 4459 21 | 21 0 4460 4655 22 | 22 0 4656 4791 23 | 23 0 4792 4819 24 | 24 0 4820 4941 25 | 25 0 4942 4974 26 | 26 0 4975 5202 27 | 27 1 5203 5459 28 | 28 1 5460 5771 29 | 29 0 5772 5888 30 | 30 0 5889 6001 31 | 31 1 6002 6148 32 | 32 0 6149 6225 33 | 33 1 6226 6538 34 | 34 0 6539 6863 35 | 35 0 6864 6998 36 | 36 0 6999 7111 37 | 37 0 7112 7289 38 | 38 0 7290 7445 39 | 39 0 7446 7570 40 | 40 1 7571 7844 41 | 41 1 7845 8336 42 | 42 0 8337 8443 43 | 43 0 8444 8496 44 | 44 0 8497 8677 45 | 45 0 8678 8892 46 | 46 0 8893 9053 47 | 47 0 9054 9215 48 | 48 0 9216 9403 49 | 49 1 9404 9492 50 | 50 0 9493 9692 51 | 51 0 9693 9813 52 | 52 0 9814 9966 53 | 53 0 9967 10093 54 | 54 0 10094 10402 55 | 55 0 10403 10479 56 | 56 0 10480 10572 57 | 57 1 10573 10712 58 | 58 0 10713 10897 59 | 59 0 10898 10950 60 | 60 0 10951 11124 61 | 61 0 11125 11218 62 | 62 0 11219 11312 63 | 63 0 11313 11454 64 | 64 0 11455 11534 65 | 65 0 11535 11679 66 | 66 0 11680 11879 67 | 67 0 11880 12015 68 | 68 0 12016 12240 69 | 69 0 12241 12437 70 | 70 0 12438 12655 71 | 71 0 12656 12835 72 | 72 0 12836 12862 73 | 73 0 12863 12948 74 | 74 0 12949 13099 75 | 75 0 13100 13249 76 | 76 0 13250 13427 77 | 77 0 13428 13679 78 | 78 0 13680 13794 79 | 79 0 13795 13846 80 | 80 0 13847 14048 81 | 81 0 14049 14301 82 | 82 0 14302 14444 83 | 83 0 14445 14566 84 | 84 0 14567 14776 85 | 85 0 14777 14889 86 | 86 0 14890 15152 87 | 87 0 15153 15310 88 | 88 0 15311 15477 89 | 89 0 15478 15662 90 | 90 0 15663 15852 91 | 91 0 15853 15910 92 | 92 0 15911 16200 93 | 93 0 16201 16284 94 | 94 0 16285 16472 95 | 95 0 16473 16647 96 | 96 0 16648 16899 97 | 97 0 16900 17050 98 | 98 0 17051 17250 99 | 99 0 17251 17381 100 | 100 0 17382 17655 101 | 101 0 17656 17785 102 | 102 0 17786 17964 103 | 103 0 17965 18120 104 | 104 0 18121 18230 105 | 105 0 18231 18407 106 | 106 0 18408 18515 107 | 107 0 18516 18661 108 | 108 0 18662 18739 109 | 109 0 18740 18838 110 | 110 0 18839 18924 111 | 111 0 18925 19069 112 | 112 0 19070 19192 113 | 113 0 19193 19326 114 | 114 0 19327 19379 115 | 115 0 19380 19439 116 | 116 0 19440 19707 117 | 117 0 19708 19845 118 | 118 0 19846 20115 119 | 119 1 20116 20296 120 | 120 0 20297 20390 121 | 121 1 20391 20542 122 | 122 1 20543 20665 123 | 123 0 20666 20848 124 | 124 1 20849 20948 125 | 125 0 20949 21063 126 | 126 0 21064 21222 127 | 127 0 21223 21555 128 | -------------------------------------------------------------------------------- /code/source/model/attentionLayer.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | 18 | The class in this file is adapated from 19 | https://github.com/yuhaozhang/tacred-relation, 20 | licensed under the Apache License 2.0 21 | cf. 3rd-party-licenses.txt file in the root directory of this source tree. 22 | """ 23 | 24 | import torch 25 | from torch import nn 26 | import torch.nn.functional as F 27 | 28 | """attention layer for use in bilstm framework""" 29 | 30 | class Attention(nn.Module): 31 | """ 32 | A position-augmented attention layer where the attention weight is 33 | a = T' . tanh(Ux) 34 | where x is the input. 35 | """ 36 | 37 | def __init__(self, input_size, attn_size): 38 | super(Attention, self).__init__() 39 | self.input_size = input_size 40 | self.attn_size = attn_size 41 | self.ulinear = nn.Linear(input_size, attn_size) 42 | self.tlinear = nn.Linear(attn_size, 1) 43 | self.init_weights() 44 | 45 | def init_weights(self): 46 | self.ulinear.weight.data.normal_(std=0.001) 47 | self.tlinear.weight.data.zero_() # use zero to give uniform attention at the beginning 48 | 49 | def forward(self, x, x_mask): 50 | """ 51 | x : batch_size * seq_len * input_size 52 | x_mask : same dimensions, but bool tensor. contains true if masked, false if not masked 53 | """ 54 | batch_size, seq_len, _ = x.size() 55 | 56 | x_proj = self.ulinear(x.contiguous().view(-1, self.input_size)).view( 57 | batch_size, seq_len, self.attn_size) 58 | 59 | scores = self.tlinear(torch.tanh(x_proj).view(-1, self.attn_size)).view( 60 | batch_size, seq_len) 61 | 62 | # mask padding 63 | scores.data.masked_fill_(x_mask.data, -float('inf')) 64 | weights = F.softmax(scores, dim=1) 65 | # weighted average input vectors 66 | outputs = weights.unsqueeze(1).bmm(x).squeeze(1) 67 | 68 | return outputs, weights 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /code/source/model/encoder.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | import torch.nn as nn 20 | import torch 21 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 22 | from model.embeddingLayer import StackedEmbeddings 23 | 24 | 25 | class Encoder(nn.Module): 26 | """ 27 | bilstm model for experiment sentence classification 28 | """ 29 | 30 | def __init__(self, weights_matrix, embedding_options, options, device): 31 | """ 32 | 33 | :param weights_matrix: numpy matrix with word embeddings 34 | :param options: python dictionary with hyperparameters 35 | """ 36 | 37 | super().__init__() 38 | 39 | self.hidden_size = options['hidden_size'] 40 | self.device = device 41 | 42 | embedding_types = [emb_opt['embedding_type'] for emb_opt in embedding_options] 43 | pretrained_embeddings = [torch.from_numpy(emb_opt['weights']).float().to(self.device) if emb_opt['weights'] is not None else None for emb_opt in embedding_options if emb_opt["name"] != "bert"] 44 | self.embeddingLayer = StackedEmbeddings(embedding_types, pretrained_embeddings, embedding_options, options, device) 45 | 46 | embeddingLayer_out_dim = self.embeddingLayer.get_output_dim() 47 | 48 | self.lstm = nn.LSTM(embeddingLayer_out_dim, 49 | self.hidden_size, 50 | num_layers=options['num_layers'], 51 | batch_first=True, 52 | bidirectional=True) 53 | 54 | 55 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths): 56 | x = self.embeddingLayer(tokens, subtokens, bertTensor, bert_subtoken_mask, token_lengths) 57 | 58 | lengths = lengths.cpu() 59 | inputs = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False) 60 | 61 | outputs, (ht, ct) = self.lstm(inputs) 62 | outputs, output_lens = pad_packed_sequence(outputs, batch_first=True) 63 | 64 | return outputs, output_lens 65 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5456866.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 752 2 | 2 1 753 973 3 | 3 1 974 1189 4 | 4 1 1190 1382 5 | 5 0 1383 1542 6 | 6 1 1543 1811 7 | 7 0 1812 1902 8 | 8 0 1903 2186 9 | 9 0 2187 2384 10 | 10 0 2385 2528 11 | 11 0 2529 2813 12 | 12 1 2814 2904 13 | 13 1 2905 3115 14 | 14 0 3116 3245 15 | 15 0 3246 3416 16 | 16 0 3417 3589 17 | 17 0 3590 3908 18 | 18 0 3909 4231 19 | 19 0 4232 4539 20 | 20 0 4540 4742 21 | 21 1 4743 4993 22 | 22 1 4994 5117 23 | 23 1 5118 5340 24 | 24 0 5341 5679 25 | 25 0 5680 5808 26 | 26 0 5809 6114 27 | 27 0 6115 6252 28 | 28 0 6253 6382 29 | 29 0 6383 6737 30 | 30 0 6738 6799 31 | 31 0 6800 7022 32 | 32 0 7023 7141 33 | 33 0 7142 7228 34 | 34 0 7229 7316 35 | 35 0 7317 7610 36 | 36 0 7611 7715 37 | 37 0 7716 7842 38 | 38 0 7843 8010 39 | 39 0 8011 8243 40 | 40 0 8244 8424 41 | 41 0 8425 8600 42 | 42 0 8601 8725 43 | 43 0 8726 8871 44 | 44 0 8872 9088 45 | 45 0 9089 9231 46 | 46 0 9232 9324 47 | 47 0 9325 9441 48 | 48 0 9442 9534 49 | 49 0 9535 9635 50 | 50 0 9636 9704 51 | 51 0 9705 9850 52 | 52 0 9851 10008 53 | 53 0 10009 10265 54 | 54 0 10266 10379 55 | 55 0 10380 10407 56 | 56 0 10408 10706 57 | 57 0 10707 10766 58 | 58 0 10767 10923 59 | 59 0 10924 11160 60 | 60 0 11161 11441 61 | 61 0 11442 11709 62 | 62 0 11710 12078 63 | 63 0 12079 12214 64 | 64 0 12215 12355 65 | 65 0 12356 12616 66 | 66 0 12617 12726 67 | 67 0 12727 12973 68 | 68 0 12974 13110 69 | 69 0 13111 13194 70 | 70 0 13195 13306 71 | 71 0 13307 13420 72 | 72 0 13421 13558 73 | 73 0 13559 13637 74 | 74 0 13638 13721 75 | 75 0 13722 13924 76 | 76 0 13925 14061 77 | 77 0 14062 14125 78 | 78 0 14126 14233 79 | 79 0 14234 14441 80 | 80 0 14442 14552 81 | 81 0 14553 14681 82 | 82 0 14682 14881 83 | 83 0 14882 15042 84 | 84 0 15043 15134 85 | 85 0 15135 15310 86 | 86 0 15311 15632 87 | 87 0 15633 15923 88 | 88 0 15924 16073 89 | 89 0 16074 16272 90 | 90 0 16273 16350 91 | 91 0 16351 16490 92 | 92 0 16491 16557 93 | 93 0 16558 16600 94 | 94 0 16601 16662 95 | 95 0 16663 16832 96 | 96 0 16833 16960 97 | 97 0 16961 17132 98 | 98 0 17133 17233 99 | 99 0 17234 17383 100 | 100 0 17384 17608 101 | 101 0 17609 17676 102 | 102 0 17677 17810 103 | 103 0 17811 18113 104 | 104 0 18114 18174 105 | 105 0 18175 18288 106 | 106 0 18289 18416 107 | 107 0 18417 18489 108 | 108 0 18490 18795 109 | 109 0 18796 18839 110 | 110 0 18840 19010 111 | 111 0 19011 19148 112 | 112 0 19149 19393 113 | 113 1 19394 19693 114 | 114 0 19694 19904 115 | 115 0 19905 20065 116 | 116 0 20066 20163 117 | 117 0 20164 20221 118 | 118 1 20222 20420 119 | 119 1 20421 20618 120 | 120 1 20619 20776 121 | 121 1 20777 21001 122 | 122 0 21002 21264 123 | 123 0 21265 21431 124 | 124 0 21432 21547 125 | 125 1 21548 21737 126 | 126 1 21738 21841 127 | 127 0 21842 22070 128 | 128 0 22071 22206 129 | 129 0 22207 22478 130 | 130 0 22479 22790 131 | 131 0 22791 22959 132 | 132 0 22960 23229 133 | 133 0 23230 23318 134 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC3564701.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 1007 2 | 2 0 1008 1141 3 | 3 0 1142 1273 4 | 4 1 1274 1492 5 | 5 1 1493 1806 6 | 6 0 1807 1960 7 | 7 0 1961 2126 8 | 8 0 2127 2221 9 | 9 0 2222 2347 10 | 10 0 2348 2455 11 | 11 1 2456 2713 12 | 12 0 2714 2811 13 | 13 0 2812 2970 14 | 14 0 2971 3193 15 | 15 1 3194 3354 16 | 16 1 3355 3501 17 | 17 0 3502 3694 18 | 18 0 3695 3808 19 | 19 1 3809 4103 20 | 20 1 4104 4278 21 | 21 0 4279 4451 22 | 22 0 4452 4642 23 | 23 1 4643 4807 24 | 24 0 4808 5092 25 | 25 1 5093 5318 26 | 26 0 5319 5496 27 | 27 0 5497 5602 28 | 28 0 5603 5898 29 | 29 0 5899 6128 30 | 30 0 6129 6251 31 | 31 0 6252 6342 32 | 32 0 6343 6482 33 | 33 0 6483 6636 34 | 34 0 6637 6694 35 | 35 0 6695 6740 36 | 36 0 6741 6812 37 | 37 0 6813 6849 38 | 38 0 6850 6876 39 | 39 0 6877 6989 40 | 40 0 6990 7221 41 | 41 0 7222 7308 42 | 42 0 7309 7412 43 | 43 0 7413 7510 44 | 44 0 7511 7679 45 | 45 0 7680 7781 46 | 46 0 7782 7962 47 | 47 0 7963 8090 48 | 48 0 8091 8264 49 | 49 0 8265 8343 50 | 50 0 8344 8544 51 | 51 0 8545 8682 52 | 52 0 8683 8814 53 | 53 0 8815 8975 54 | 54 0 8976 9086 55 | 55 0 9087 9158 56 | 56 0 9159 9329 57 | 57 0 9330 9442 58 | 58 0 9443 9614 59 | 59 0 9615 9774 60 | 60 0 9775 9966 61 | 61 0 9967 10165 62 | 62 0 10166 10306 63 | 63 0 10307 10396 64 | 64 0 10397 10486 65 | 65 0 10487 10572 66 | 66 0 10573 10623 67 | 67 0 10624 10752 68 | 68 0 10753 10925 69 | 69 0 10926 11139 70 | 70 0 11140 11238 71 | 71 0 11239 11390 72 | 72 0 11391 11686 73 | 73 0 11687 11760 74 | 74 0 11761 11841 75 | 75 0 11842 11938 76 | 76 0 11939 12012 77 | 77 0 12013 12120 78 | 78 0 12121 12217 79 | 79 0 12218 12532 80 | 80 0 12533 12690 81 | 81 0 12691 12842 82 | 82 0 12843 12918 83 | 83 0 12919 13108 84 | 84 0 13109 13287 85 | 85 0 13288 13335 86 | 86 0 13336 13394 87 | 87 0 13395 13524 88 | 88 1 13525 13781 89 | 89 0 13782 13894 90 | 90 1 13895 14006 91 | 91 0 14007 14125 92 | 92 0 14126 14238 93 | 93 1 14239 14357 94 | 94 0 14358 14555 95 | 95 1 14556 14803 96 | 96 1 14804 14952 97 | 97 0 14953 15193 98 | 98 0 15194 15275 99 | 99 0 15276 15463 100 | 100 1 15464 15717 101 | 101 0 15718 15774 102 | 102 0 15775 15888 103 | 103 0 15889 15978 104 | 104 0 15979 16034 105 | 105 0 16035 16169 106 | 106 1 16170 16335 107 | 107 0 16336 16451 108 | 108 0 16452 16572 109 | 109 0 16573 16672 110 | 110 0 16673 16955 111 | 111 0 16956 17128 112 | 112 0 17129 17234 113 | 113 0 17235 17369 114 | 114 0 17370 17471 115 | 115 0 17472 17652 116 | 116 0 17653 17767 117 | 117 0 17768 17951 118 | 118 1 17952 18128 119 | 119 1 18129 18281 120 | 120 1 18282 18493 121 | 121 0 18494 18591 122 | 122 0 18592 18762 123 | 123 0 18763 19046 124 | 124 0 19047 19125 125 | 125 0 19126 19243 126 | 126 0 19244 19297 127 | 127 0 19298 19372 128 | 128 0 19373 19432 129 | 129 0 19433 19495 130 | 130 0 19496 19547 131 | 131 0 19548 19680 132 | 132 0 19681 19801 133 | 133 0 19802 19890 134 | 134 0 19891 19991 135 | -------------------------------------------------------------------------------- /code/source/model/sentenceClassifier.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | 20 | import torch.nn as nn 21 | import torch 22 | from model.attentionLayer import Attention 23 | from model.encoder import Encoder 24 | 25 | 26 | class SentenceClassifier(nn.Module): 27 | """ 28 | bilstm model for experiment sentence classification 29 | """ 30 | def __init__(self, weights_matrix, embedding_options, options, device): 31 | """ 32 | 33 | :param weights_matrix: numpy matrix with word embeddings 34 | :param options: python dictionary with hyperparameters 35 | """ 36 | 37 | super().__init__() 38 | 39 | self.hidden_size = options['hidden_size'] 40 | self.num_labels = options['num_labels'] 41 | self.device = device 42 | self.options = options 43 | 44 | self.encoder = Encoder(weights_matrix, embedding_options, options, device) 45 | 46 | representation_size = 2 * self.hidden_size 47 | 48 | self.attention_size = options['attention_size'] 49 | self.attention = Attention(self.hidden_size*2, self.attention_size) 50 | self.batchnorm3 = nn.BatchNorm1d(self.hidden_size*2) 51 | 52 | self.linear = nn.Linear(representation_size, self.num_labels) 53 | 54 | def forward(self, tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths, return_weights=False): 55 | 56 | outputs, out_lens = self.encoder(tokens, subtokens, bertTensor, bert_subtoken_mask, lengths, token_lengths) 57 | 58 | max_length = outputs.shape[1] 59 | 60 | masks = [] 61 | for length in lengths: 62 | falses = [False]*length.item() 63 | trues = [True]*(outputs.shape[1] - length.item()) 64 | mask_tsr = torch.tensor(falses+trues).unsqueeze(0) 65 | masks.append(mask_tsr) 66 | masks = torch.cat(masks, dim=0).to(self.device) 67 | 68 | hidden, attention_weights = self.attention(outputs, masks) 69 | hidden = self.batchnorm3(hidden) 70 | 71 | output = self.linear(hidden) 72 | 73 | if return_weights: 74 | return output, attention_weights 75 | else: 76 | return output 77 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6632008.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 1 1174 1182 2 | SPAN 2 EXPERIMENT:current_exp 1 1187 1195 3 | SPAN 3 DEVICE 1 1225 1246 4 | SPAN 4 DEVICE 1 1248 1252 5 | SPAN 5 MATERIAL 1 1263 1281 6 | SPAN 6 EXPERIMENT:current_exp 2 18 26 7 | SPAN 7 MATERIAL 2 39 65 8 | SPAN 8 MATERIAL 2 67 72 9 | SPAN 9 VALUE 5 34 45 10 | SPAN 10 EXPERIMENT:current_exp 5 49 57 11 | SPAN 11 VALUE 5 61 67 12 | SPAN 12 MATERIAL 6 40 44 13 | SPAN 13 EXPERIMENT:current_exp 6 57 68 14 | SPAN 14 EXPERIMENT:current_exp 54 28 37 15 | SPAN 15 VALUE 54 83 94 16 | SPAN 16 VALUE 54 98 101 17 | SPAN 17 VALUE 54 106 112 18 | SPAN 18 VALUE 54 117 121 19 | SPAN 19 MATERIAL 85 57 83 20 | SPAN 20 MATERIAL 85 100 126 21 | SPAN 21 MATERIAL 85 128 132 22 | SPAN 22 MATERIAL 89 135 172 23 | SPAN 23 EXPERIMENT:current_exp 102 32 44 24 | SPAN 24 VALUE 102 102 115 25 | SPAN 25 MATERIAL 103 4 6 26 | SPAN 26 MATERIAL 103 7 33 27 | SPAN 27 MATERIAL 103 34 38 28 | SPAN 28 MATERIAL 103 39 65 29 | SPAN 29 MATERIAL 116 23 49 30 | SPAN 30 MATERIAL 118 19 22 31 | SPAN 31 EXPERIMENT:current_exp 118 44 46 32 | SPAN 32 VALUE 118 52 63 33 | SPAN 33 EXPERIMENT:current_exp 118 87 89 34 | SPAN 34 VALUE 118 90 101 35 | SPAN 35 MATERIAL 118 120 128 36 | SPAN 36 EXPERIMENT:current_exp 118 150 152 37 | SPAN 37 VALUE 118 153 164 38 | SPAN 38 EXPERIMENT:current_exp 118 187 189 39 | SPAN 39 VALUE 118 190 201 40 | SPAN 40 VALUE 118 211 217 41 | SPAN 41 VALUE 121 13 24 42 | SPAN 42 EXPERIMENT:current_exp 121 25 35 43 | SPAN 43 VALUE 121 52 78 44 | SPAN 44 VALUE 121 91 102 45 | SPAN 45 DEVICE 122 77 81 46 | SPAN 46 MATERIAL 123 31 57 47 | SPAN 47 DEVICE 127 29 33 48 | SPAN 48 VALUE 127 57 60 49 | SPAN 49 EXPERIMENT:current_exp 127 61 69 50 | SPAN 50 VALUE 127 105 116 51 | SPAN 51 VALUE 127 123 127 52 | SPAN 52 VALUE 127 132 136 53 | SPAN 53 VALUE 127 140 146 54 | SPAN 54 VALUE 127 151 157 55 | EXPERIMENT 1 2 56 | device 3 57 | cathode_material 5 58 | EXPERIMENT 2 6 59 | cathode_material 7 60 | EXPERIMENT 3 10 61 | conductivity 9 62 | working_temperature 11 63 | EXPERIMENT 4 13 64 | electrolyte_material 12 65 | EXPERIMENT 5 14 66 | current_density 15 67 | working_temperature 16 68 | working_temperature 17 69 | time_of_operation 18 70 | EXPERIMENT 6 23 71 | working_temperature 24 72 | EXPERIMENT 7 31 73 | cathode_material 30 74 | resistance 32 75 | EXPERIMENT 8 33 76 | power_density 34 77 | EXPERIMENT 9 36 78 | cathode_material 35 79 | resistance 37 80 | EXPERIMENT 10 38 81 | power_density 39 82 | working_temperature 40 83 | EXPERIMENT 11 42 84 | resistance 41 85 | resistance 43 86 | resistance 44 87 | EXPERIMENT 12 49 88 | device 47 89 | time_of_operation 48 90 | time_of_operation 51 91 | time_of_operation 52 92 | working_temperature 53 93 | working_temperature 54 94 | LINK same_experiment 1 2 95 | LINK same_experiment 2 6 96 | LINK experiment_variation 36 33 97 | LINK same_experiment 31 33 98 | LINK same_experiment 36 38 99 | LINK coreference 3 4 100 | LINK coreference 7 8 101 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6632008.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 1293 2 | 2 1 1294 1480 3 | 3 0 1481 1633 4 | 4 0 1634 1749 5 | 5 1 1750 1818 6 | 6 1 1819 1973 7 | 7 0 1974 2111 8 | 8 0 2112 2168 9 | 9 0 2169 2413 10 | 10 0 2414 2655 11 | 11 0 2656 2799 12 | 12 0 2800 3006 13 | 13 0 3007 3161 14 | 14 0 3162 3364 15 | 15 0 3365 3627 16 | 16 0 3628 3707 17 | 17 0 3708 3948 18 | 18 0 3949 4091 19 | 19 0 4092 4256 20 | 20 0 4257 4499 21 | 21 0 4500 4749 22 | 22 0 4750 4898 23 | 23 0 4899 5148 24 | 24 0 5149 5290 25 | 25 0 5291 5578 26 | 26 0 5579 5596 27 | 27 0 5597 5738 28 | 28 0 5739 5977 29 | 29 0 5978 6133 30 | 30 0 6134 6199 31 | 31 0 6200 6296 32 | 32 0 6297 6434 33 | 33 0 6435 6642 34 | 34 0 6643 6702 35 | 35 0 6703 6818 36 | 36 0 6819 7020 37 | 37 0 7021 7092 38 | 38 0 7093 7167 39 | 39 0 7168 7275 40 | 40 0 7276 7363 41 | 41 0 7364 7628 42 | 42 0 7629 7715 43 | 43 0 7716 8084 44 | 44 0 8085 8456 45 | 45 0 8457 8880 46 | 46 0 8881 8989 47 | 47 0 8990 9077 48 | 48 0 9078 9239 49 | 49 0 9240 9390 50 | 50 0 9391 9602 51 | 51 0 9603 9740 52 | 52 0 9741 9904 53 | 53 0 9905 10006 54 | 54 1 10007 10129 55 | 55 0 10130 10231 56 | 56 0 10232 10324 57 | 57 0 10325 10456 58 | 58 0 10457 10484 59 | 59 0 10485 10598 60 | 60 0 10599 10716 61 | 61 0 10717 10921 62 | 62 0 10922 11151 63 | 63 0 11152 11298 64 | 64 0 11299 11527 65 | 65 0 11528 11782 66 | 66 0 11783 11853 67 | 67 0 11854 12003 68 | 68 0 12004 12130 69 | 69 0 12131 12240 70 | 70 0 12241 12403 71 | 71 0 12404 12542 72 | 72 0 12543 12704 73 | 73 0 12705 12828 74 | 74 0 12829 13035 75 | 75 0 13036 13114 76 | 76 0 13115 13161 77 | 77 0 13162 13352 78 | 78 0 13353 13606 79 | 79 0 13607 13816 80 | 80 0 13817 13945 81 | 81 0 13946 14152 82 | 82 0 14153 14226 83 | 83 0 14227 14448 84 | 84 0 14449 14662 85 | 85 0 14663 14947 86 | 86 0 14948 15093 87 | 87 0 15094 15166 88 | 88 0 15167 15319 89 | 89 0 15320 15578 90 | 90 0 15579 15647 91 | 91 0 15648 15782 92 | 92 0 15783 15849 93 | 93 0 15850 15949 94 | 94 0 15950 16168 95 | 95 0 16169 16460 96 | 96 0 16461 16987 97 | 97 0 16988 17074 98 | 98 0 17075 17266 99 | 99 0 17267 17403 100 | 100 0 17404 17528 101 | 101 0 17529 17694 102 | 102 1 17695 17811 103 | 103 0 17812 17941 104 | 104 0 17942 18094 105 | 105 0 18095 18486 106 | 106 0 18487 18811 107 | 107 0 18812 18997 108 | 108 0 18998 19143 109 | 109 0 19144 19286 110 | 110 0 19287 19368 111 | 111 0 19369 19586 112 | 112 0 19587 19731 113 | 113 0 19732 19780 114 | 114 0 19781 19933 115 | 115 0 19934 20034 116 | 116 0 20035 20129 117 | 117 0 20130 20281 118 | 118 1 20282 20500 119 | 119 0 20501 20606 120 | 120 0 20607 20763 121 | 121 1 20764 21094 122 | 122 0 21095 21215 123 | 123 0 21216 21373 124 | 124 0 21374 21586 125 | 125 0 21587 21747 126 | 126 0 21748 21839 127 | 127 1 21840 21998 128 | 128 0 21999 22295 129 | 129 0 22296 22405 130 | 130 0 22406 22511 131 | 131 0 22512 22594 132 | 132 0 22595 22754 133 | 133 0 22755 22833 134 | 134 0 22834 22974 135 | 135 0 22975 23183 136 | 136 0 23184 23285 137 | 137 0 23286 23380 138 | 138 0 23381 23459 139 | 139 0 23460 23610 140 | 140 0 23611 23759 141 | 141 0 23760 23902 142 | -------------------------------------------------------------------------------- /code/source/dataHandling/sampling.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Annemarie Friedrich 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as published 7 | by the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | You should have received a copy of the GNU Affero General Public License 15 | along with this program. If not, see . 16 | """ 17 | 18 | from torch.utils.data import Sampler 19 | import random 20 | from copy import deepcopy 21 | from collections import defaultdict 22 | 23 | 24 | class WeightedDownSampler(Sampler): 25 | 26 | def __init__(self, dataset, class_idx, class_weights, class_key=None): 27 | """ 28 | :param data_source: The data set to be sampled from. 29 | :param class_weights: dictionary with downsampling weights for the classes, 0.0 means "keep all", 0.3 means 30 | "keep 70% of this class" 31 | :param class_key: if give, use this instead of class index (not tested yet!!) --> for pytorch_all data structures 32 | 33 | Instantiate only once (when creating the Dataset instance). 34 | In DataLoader, call only the iterator: when instantiating the iterator, the sampling really happens. 35 | """ 36 | # collect information about where instances of each class are in dataset 37 | self.class_weights = class_weights 38 | self.indices_by_class = defaultdict(list) 39 | for i, inst in enumerate(dataset): 40 | if class_key: 41 | label = int(inst[class_key].item()) 42 | else: 43 | # assume a class_idx is given in this case 44 | label = int(inst[class_idx].item()) # assume integer coding for classes 45 | self.indices_by_class[label].append(i) 46 | # determine length of samples according to the given downsampling weights 47 | self.num_samples = int(sum([(1-class_weights[c])*len(self.indices_by_class[c]) for c in class_weights])) 48 | 49 | def __iter__(self): 50 | """ 51 | :return: an iterator over the indices of the items, which will be used sequentially by DataLoader 52 | to split the data into batches. 53 | """ 54 | # Now actually downsample the data 55 | indices = [] 56 | for c in self.indices_by_class: 57 | class_indices = deepcopy(self.indices_by_class[c]) 58 | random.shuffle(class_indices) 59 | indices += class_indices[:int(len(class_indices)*(1-self.class_weights[c]))] 60 | random.shuffle(indices) 61 | return iter(indices) 62 | 63 | def __len__(self): 64 | return self.num_samples 65 | 66 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4772004.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 794 2 | 2 1 795 967 3 | 3 0 968 1101 4 | 4 1 1102 1288 5 | 5 0 1289 1404 6 | 6 0 1405 1651 7 | 7 0 1652 1820 8 | 8 0 1821 1917 9 | 9 0 1918 1980 10 | 10 0 1981 2160 11 | 11 1 2161 2310 12 | 12 0 2311 2428 13 | 13 0 2429 2618 14 | 14 0 2619 2780 15 | 15 1 2781 3045 16 | 16 0 3046 3112 17 | 17 0 3113 3262 18 | 18 0 3263 3343 19 | 19 0 3344 3417 20 | 20 0 3418 3636 21 | 21 0 3637 3736 22 | 22 1 3737 4014 23 | 23 0 4015 4129 24 | 24 1 4130 4333 25 | 25 0 4334 4589 26 | 26 1 4590 4759 27 | 27 0 4760 4907 28 | 28 0 4908 5014 29 | 29 0 5015 5176 30 | 30 0 5177 5274 31 | 31 0 5275 5397 32 | 32 0 5398 5584 33 | 33 0 5585 5760 34 | 34 0 5761 5863 35 | 35 0 5864 5954 36 | 36 0 5955 6044 37 | 37 0 6045 6180 38 | 38 0 6181 6399 39 | 39 0 6400 6512 40 | 40 0 6513 6667 41 | 41 0 6668 6672 42 | 42 0 6673 6843 43 | 43 0 6844 6848 44 | 44 0 6849 6960 45 | 45 0 6961 7188 46 | 46 0 7189 7345 47 | 47 0 7346 7350 48 | 48 0 7351 7453 49 | 49 0 7454 7649 50 | 50 0 7650 7800 51 | 51 0 7801 8004 52 | 52 0 8005 8262 53 | 53 0 8263 8344 54 | 54 0 8345 8444 55 | 55 0 8445 8547 56 | 56 0 8548 8658 57 | 57 0 8659 8910 58 | 58 0 8911 8981 59 | 59 1 8982 9087 60 | 60 0 9088 9140 61 | 61 0 9141 9219 62 | 62 0 9220 9334 63 | 63 0 9335 9460 64 | 64 0 9461 9592 65 | 65 0 9593 9719 66 | 66 0 9720 9893 67 | 67 0 9894 10060 68 | 68 0 10061 10183 69 | 69 0 10184 10259 70 | 70 0 10260 10319 71 | 71 0 10320 10407 72 | 72 0 10408 10621 73 | 73 1 10622 10753 74 | 74 0 10754 10876 75 | 75 0 10877 11119 76 | 76 0 11120 11332 77 | 77 1 11333 11476 78 | 78 1 11477 11582 79 | 79 0 11583 11753 80 | 80 0 11754 11876 81 | 81 1 11877 12075 82 | 82 0 12076 12323 83 | 83 0 12324 12535 84 | 84 1 12536 12714 85 | 85 0 12715 12927 86 | 86 0 12928 13079 87 | 87 1 13080 13300 88 | 88 1 13301 13433 89 | 89 0 13434 13618 90 | 90 0 13619 13864 91 | 91 0 13865 13945 92 | 92 0 13946 14071 93 | 93 0 14072 14160 94 | 94 0 14161 14334 95 | 95 0 14335 14574 96 | 96 0 14575 14675 97 | 97 0 14676 14935 98 | 98 1 14936 15142 99 | 99 0 15143 15341 100 | 100 0 15342 15439 101 | 101 1 15440 15533 102 | 102 0 15534 15694 103 | 103 0 15695 15914 104 | 104 0 15915 15975 105 | 105 0 15976 16078 106 | 106 0 16079 16187 107 | 107 0 16188 16444 108 | 108 0 16445 16602 109 | 109 0 16603 17046 110 | 110 0 17047 17142 111 | 111 0 17143 17250 112 | 112 0 17251 17408 113 | 113 0 17409 17511 114 | 114 0 17512 17669 115 | 115 0 17670 17786 116 | 116 0 17787 17890 117 | 117 0 17891 18032 118 | 118 0 18033 18185 119 | 119 0 18186 18285 120 | 120 0 18286 18289 121 | 121 0 18290 18407 122 | 122 0 18408 18457 123 | 123 0 18458 18575 124 | 124 0 18576 18658 125 | 125 0 18659 18881 126 | 126 0 18882 18993 127 | 127 0 18994 19144 128 | 128 1 19145 19439 129 | 129 0 19440 19627 130 | 130 0 19628 19716 131 | 131 0 19717 19899 132 | 132 0 19900 20022 133 | 133 0 20023 20188 134 | 134 0 20189 20346 135 | 135 0 20347 20529 136 | 136 0 20530 20657 137 | 137 0 20658 20739 138 | 138 1 20740 20854 139 | 139 0 20855 20955 140 | 140 0 20956 21109 141 | 141 1 21110 21256 142 | 142 0 21257 21396 143 | 143 0 21397 21513 144 | 144 0 21514 21570 145 | 145 0 21571 21580 146 | 146 0 21581 21699 147 | 147 0 21700 21704 148 | 148 0 21705 21750 149 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5331335.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1113 2 | 2 0 1114 1295 3 | 3 0 1296 1511 4 | 4 1 1512 1944 5 | 5 0 1945 2082 6 | 6 1 2083 2254 7 | 7 0 2255 2474 8 | 8 1 2475 2829 9 | 9 0 2830 2998 10 | 10 1 2999 3184 11 | 11 1 3185 3396 12 | 12 0 3397 3560 13 | 13 1 3561 3737 14 | 14 0 3738 3843 15 | 15 0 3844 4013 16 | 16 0 4014 4195 17 | 17 0 4196 4418 18 | 18 0 4419 4682 19 | 19 0 4683 4921 20 | 20 0 4922 5059 21 | 21 0 5060 5225 22 | 22 0 5226 5500 23 | 23 1 5501 5752 24 | 24 0 5753 5905 25 | 25 1 5906 6143 26 | 26 1 6144 6290 27 | 27 0 6291 6403 28 | 28 0 6404 6711 29 | 29 0 6712 7003 30 | 30 0 7004 7308 31 | 31 0 7309 7487 32 | 32 0 7488 7580 33 | 33 0 7581 7822 34 | 34 0 7823 7921 35 | 35 1 7922 8101 36 | 36 0 8102 8466 37 | 37 0 8467 8727 38 | 38 0 8728 8890 39 | 39 0 8891 9109 40 | 40 0 9110 9191 41 | 41 0 9192 9356 42 | 42 0 9357 9433 43 | 43 0 9434 9611 44 | 44 0 9612 9846 45 | 45 0 9847 9969 46 | 46 0 9970 10080 47 | 47 0 10081 10252 48 | 48 0 10253 10590 49 | 49 0 10591 10716 50 | 50 0 10717 10887 51 | 51 0 10888 10978 52 | 52 0 10979 11249 53 | 53 0 11250 11355 54 | 54 1 11356 11532 55 | 55 0 11533 11749 56 | 56 1 11750 11930 57 | 57 1 11931 12123 58 | 58 1 12124 12305 59 | 59 1 12306 12504 60 | 60 0 12505 12582 61 | 61 0 12583 12701 62 | 62 0 12702 12902 63 | 63 0 12903 13084 64 | 64 0 13085 13279 65 | 65 0 13280 13477 66 | 66 1 13478 13590 67 | 67 0 13591 13754 68 | 68 0 13755 13925 69 | 69 0 13926 14068 70 | 70 0 14069 14231 71 | 71 0 14232 14433 72 | 72 0 14434 14750 73 | 73 0 14751 14907 74 | 74 0 14908 15140 75 | 75 0 15141 15244 76 | 76 0 15245 15334 77 | 77 0 15335 15435 78 | 78 0 15436 15558 79 | 79 0 15559 15785 80 | 80 0 15786 15975 81 | 81 0 15976 16148 82 | 82 0 16149 16470 83 | 83 0 16471 16619 84 | 84 0 16620 16880 85 | 85 0 16881 17082 86 | 86 0 17083 17374 87 | 87 0 17375 17667 88 | 88 0 17668 17834 89 | 89 0 17835 18004 90 | 90 0 18005 18143 91 | 91 0 18144 18237 92 | 92 0 18238 18352 93 | 93 0 18353 18599 94 | 94 1 18600 18765 95 | 95 1 18766 19013 96 | 96 1 19014 19235 97 | 97 0 19236 19348 98 | 98 0 19349 19475 99 | 99 0 19476 19626 100 | 100 0 19627 19716 101 | 101 0 19717 19930 102 | 102 0 19931 20093 103 | 103 0 20094 20275 104 | 104 0 20276 20403 105 | 105 0 20404 20541 106 | 106 0 20542 20719 107 | 107 0 20720 20804 108 | 108 0 20805 20948 109 | 109 0 20949 21104 110 | 110 0 21105 21486 111 | 111 1 21487 21672 112 | 112 0 21673 21852 113 | 113 0 21853 21987 114 | 114 0 21988 22188 115 | 115 0 22189 22379 116 | 116 1 22380 22592 117 | 117 1 22593 22818 118 | 118 0 22819 23109 119 | 119 0 23110 23295 120 | 120 0 23296 23431 121 | 121 0 23432 23535 122 | 122 0 23536 23694 123 | 123 0 23695 23855 124 | 124 0 23856 23978 125 | 125 0 23979 24094 126 | 126 0 24095 24203 127 | 127 0 24204 24342 128 | 128 0 24343 24490 129 | 129 0 24491 24576 130 | 130 0 24577 24802 131 | 131 0 24803 24964 132 | 132 0 24965 25150 133 | 133 0 25151 25295 134 | 134 0 25296 25413 135 | 135 0 25414 25492 136 | 136 0 25493 25739 137 | 137 0 25740 25863 138 | 138 0 25864 26015 139 | 139 0 26016 26094 140 | 140 0 26095 26263 141 | 141 0 26264 26332 142 | 142 0 26333 26396 143 | 143 0 26397 26503 144 | 144 0 26504 26508 145 | 145 0 26509 26558 146 | 146 0 26559 26695 147 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5700654.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 924 2 | 2 0 925 927 3 | 3 0 928 1384 4 | 4 0 1385 1387 5 | 5 0 1388 1659 6 | 6 0 1660 1830 7 | 7 0 1831 1975 8 | 8 0 1976 2090 9 | 9 0 2091 2248 10 | 10 0 2249 2442 11 | 11 0 2443 2678 12 | 12 0 2679 2858 13 | 13 0 2859 3536 14 | 14 0 3537 3546 15 | 15 0 3547 3556 16 | 16 0 3557 3564 17 | 17 0 3565 3572 18 | 18 0 3573 3583 19 | 19 0 3584 3586 20 | 20 0 3587 3589 21 | 21 0 3590 3601 22 | 22 0 3602 3611 23 | 23 0 3612 3614 24 | 24 0 3615 3626 25 | 25 0 3627 3827 26 | 26 0 3828 4207 27 | 27 0 4208 4527 28 | 28 0 4528 4650 29 | 29 0 4651 5088 30 | 30 0 5089 5419 31 | 31 0 5420 5679 32 | 32 0 5680 5915 33 | 33 0 5916 6191 34 | 34 0 6192 6668 35 | 35 0 6669 7085 36 | 36 0 7086 7290 37 | 37 0 7291 7631 38 | 38 0 7632 8408 39 | 39 0 8409 8535 40 | 40 0 8536 8747 41 | 41 0 8748 8988 42 | 42 0 8989 9131 43 | 43 0 9132 9247 44 | 44 0 9248 9464 45 | 45 0 9465 9592 46 | 46 0 9593 9749 47 | 47 0 9750 9940 48 | 48 0 9941 10096 49 | 49 0 10097 10215 50 | 50 0 10216 10392 51 | 51 0 10393 10553 52 | 52 0 10554 10673 53 | 53 0 10674 10766 54 | 54 0 10767 10989 55 | 55 0 10990 11156 56 | 56 0 11157 11249 57 | 57 0 11250 11545 58 | 58 0 11546 11859 59 | 59 0 11860 11947 60 | 60 0 11948 12016 61 | 61 0 12017 12174 62 | 62 0 12175 12476 63 | 63 0 12477 12679 64 | 64 0 12680 12826 65 | 65 0 12827 12968 66 | 66 0 12969 13125 67 | 67 0 13126 13272 68 | 68 0 13273 13435 69 | 69 0 13436 13606 70 | 70 0 13607 13832 71 | 71 0 13833 13993 72 | 72 0 13994 14193 73 | 73 0 14194 14426 74 | 74 0 14427 14795 75 | 75 0 14796 14975 76 | 76 0 14976 15226 77 | 77 0 15227 15330 78 | 78 0 15331 15427 79 | 79 0 15428 15594 80 | 80 0 15595 15670 81 | 81 0 15671 15888 82 | 82 0 15889 15948 83 | 83 0 15949 16102 84 | 84 0 16103 16319 85 | 85 0 16320 16520 86 | 86 0 16521 16654 87 | 87 0 16655 16846 88 | 88 1 16847 17141 89 | 89 0 17142 17245 90 | 90 1 17246 17613 91 | 91 0 17614 17708 92 | 92 0 17709 17923 93 | 93 0 17924 18070 94 | 94 1 18071 18289 95 | 95 0 18290 18584 96 | 96 1 18585 18904 97 | 97 0 18905 19132 98 | 98 0 19133 19695 99 | 99 0 19696 20218 100 | 100 0 20219 20339 101 | 101 0 20340 20472 102 | 102 0 20473 20594 103 | 103 1 20595 20783 104 | 104 0 20784 20891 105 | 105 0 20892 21104 106 | 106 0 21105 21320 107 | 107 0 21321 21478 108 | 108 1 21479 21725 109 | 109 0 21726 21866 110 | 110 0 21867 22325 111 | 111 0 22326 22501 112 | 112 0 22502 22705 113 | 113 0 22706 22846 114 | 114 0 22847 23004 115 | 115 0 23005 23118 116 | 116 0 23119 23294 117 | 117 0 23295 23538 118 | 118 0 23539 23684 119 | 119 0 23685 23847 120 | 120 0 23848 24178 121 | 121 0 24179 24321 122 | 122 0 24322 24429 123 | 123 0 24430 24600 124 | 124 0 24601 24760 125 | 125 0 24761 24876 126 | 126 0 24877 25115 127 | 127 0 25116 25539 128 | 128 0 25540 25974 129 | 129 0 25975 26076 130 | 130 0 26077 26232 131 | 131 0 26233 26375 132 | 132 0 26376 26509 133 | 133 0 26510 26598 134 | 134 0 26599 26769 135 | 135 0 26770 26946 136 | 136 0 26947 27149 137 | 137 0 27150 27221 138 | 138 0 27222 27307 139 | 139 0 27308 27411 140 | 140 0 27412 27890 141 | 141 0 27891 28050 142 | 142 0 28051 28149 143 | 143 0 28150 28337 144 | 144 0 28338 28544 145 | 145 0 28545 28702 146 | 146 0 28703 28788 147 | 147 0 28789 28854 148 | 148 0 28855 28928 149 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4992832.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 788 2 | 2 1 789 1009 3 | 3 0 1010 1146 4 | 4 0 1147 1328 5 | 5 1 1329 1570 6 | 6 0 1571 1708 7 | 7 1 1709 1854 8 | 8 1 1855 2035 9 | 9 1 2036 2167 10 | 10 0 2168 2334 11 | 11 0 2335 2573 12 | 12 0 2574 2646 13 | 13 0 2647 2772 14 | 14 1 2773 3223 15 | 15 0 3224 3301 16 | 16 0 3302 3396 17 | 17 0 3397 3515 18 | 18 0 3516 3622 19 | 19 0 3623 3708 20 | 20 1 3709 3868 21 | 21 0 3869 3994 22 | 22 0 3995 4091 23 | 23 1 4092 4219 24 | 24 1 4220 4341 25 | 25 1 4342 4534 26 | 26 1 4535 4614 27 | 27 1 4615 4800 28 | 28 0 4801 4939 29 | 29 1 4940 5081 30 | 30 0 5082 5278 31 | 31 1 5279 5457 32 | 32 0 5458 5680 33 | 33 0 5681 5753 34 | 34 0 5754 5960 35 | 35 0 5961 6157 36 | 36 0 6158 6337 37 | 37 0 6338 6562 38 | 38 0 6563 6662 39 | 39 0 6663 6768 40 | 40 0 6769 6994 41 | 41 0 6995 7142 42 | 42 0 7143 7253 43 | 43 0 7254 7506 44 | 44 0 7507 7685 45 | 45 0 7686 8015 46 | 46 0 8016 8239 47 | 47 0 8240 8490 48 | 48 0 8491 8735 49 | 49 0 8736 8929 50 | 50 0 8930 9158 51 | 51 0 9159 9375 52 | 52 0 9376 9669 53 | 53 0 9670 9884 54 | 54 0 9885 10065 55 | 55 0 10066 10242 56 | 56 0 10243 10461 57 | 57 0 10462 10583 58 | 58 0 10584 10801 59 | 59 0 10802 10950 60 | 60 0 10951 11114 61 | 61 0 11115 11216 62 | 62 0 11217 11419 63 | 63 0 11420 11540 64 | 64 0 11541 11651 65 | 65 0 11652 11760 66 | 66 0 11761 12010 67 | 67 0 12011 12221 68 | 68 0 12222 12313 69 | 69 0 12314 12580 70 | 70 0 12581 12633 71 | 71 1 12634 12802 72 | 72 0 12803 12952 73 | 73 0 12953 13089 74 | 74 0 13090 13225 75 | 75 0 13226 13470 76 | 76 0 13471 13722 77 | 77 0 13723 13843 78 | 78 0 13844 14010 79 | 79 0 14011 14196 80 | 80 0 14197 14308 81 | 81 0 14309 14394 82 | 82 0 14395 14516 83 | 83 0 14517 14747 84 | 84 0 14748 14852 85 | 85 1 14853 15044 86 | 86 0 15045 15181 87 | 87 1 15182 15267 88 | 88 1 15268 15418 89 | 89 0 15419 15503 90 | 90 1 15504 15654 91 | 91 1 15655 15808 92 | 92 1 15809 15867 93 | 93 1 15868 16029 94 | 94 0 16030 16194 95 | 95 0 16195 16393 96 | 96 0 16394 16547 97 | 97 0 16548 16711 98 | 98 1 16712 16819 99 | 99 1 16820 17044 100 | 100 0 17045 17265 101 | 101 0 17266 17402 102 | 102 0 17403 17494 103 | 103 1 17495 17660 104 | 104 0 17661 17760 105 | 105 0 17761 18144 106 | 106 0 18145 18289 107 | 107 0 18290 18388 108 | 108 0 18389 18463 109 | 109 0 18464 18605 110 | 110 0 18606 18771 111 | 111 0 18772 18830 112 | 112 0 18831 18937 113 | 113 0 18938 19097 114 | 114 0 19098 19133 115 | 115 0 19134 19362 116 | 116 0 19363 19464 117 | 117 0 19465 19573 118 | 118 0 19574 19910 119 | 119 0 19911 20044 120 | 120 0 20045 20124 121 | 121 0 20125 20260 122 | 122 0 20261 20326 123 | 123 0 20327 20444 124 | 124 0 20445 20532 125 | 125 0 20533 20681 126 | 126 0 20682 20872 127 | 127 0 20873 21046 128 | 128 0 21047 21205 129 | 129 0 21206 21393 130 | 130 0 21394 21449 131 | 131 0 21450 21623 132 | 132 0 21624 21729 133 | 133 0 21730 21780 134 | 134 0 21781 22028 135 | 135 0 22029 22164 136 | 136 0 22165 22309 137 | 137 0 22310 22395 138 | 138 0 22396 22460 139 | 139 0 22461 22641 140 | 140 0 22642 22774 141 | 141 0 22775 22896 142 | 142 0 22897 22993 143 | 143 0 22994 23076 144 | 144 0 23077 23175 145 | 145 0 23176 23350 146 | 146 0 23351 23474 147 | 147 0 23475 23538 148 | 148 0 23539 23667 149 | 149 0 23668 23672 150 | 150 0 23673 23718 151 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5944822.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 DEVICE 133 191 213 2 | SPAN 2 DEVICE 133 215 220 3 | SPAN 3 DEVICE 135 0 22 4 | SPAN 4 DEVICE 135 23 28 5 | SPAN 5 VALUE 135 222 233 6 | SPAN 6 DEVICE 135 249 254 7 | SPAN 7 EXPERIMENT:general_info 136 32 39 8 | SPAN 8 DEVICE 136 78 83 9 | SPAN 9 DEVICE 137 163 168 10 | SPAN 10 DEVICE 138 146 151 11 | SPAN 11 DEVICE 138 211 216 12 | SPAN 12 EXPERIMENT:general_info 138 328 335 13 | SPAN 13 DEVICE 138 373 378 14 | SPAN 14 DEVICE 139 100 105 15 | SPAN 15 DEVICE 140 137 142 16 | SPAN 16 DEVICE 142 57 62 17 | SPAN 17 EXPERIMENT:general_info 142 63 71 18 | SPAN 18 MATERIAL 142 83 109 19 | SPAN 19 MATERIAL 142 113 137 20 | SPAN 20 MATERIAL 142 146 161 21 | SPAN 21 DEVICE 142 327 332 22 | SPAN 22 DEVICE 143 100 105 23 | SPAN 23 MATERIAL 144 133 168 24 | SPAN 24 DEVICE 144 209 214 25 | SPAN 25 DEVICE 144 371 375 26 | SPAN 26 EXPERIMENT:previous_work 144 376 386 27 | SPAN 27 MATERIAL 144 413 448 28 | SPAN 28 EXPERIMENT:previous_work 144 459 462 29 | SPAN 29 VALUE 144 463 476 30 | SPAN 30 VALUE 144 480 486 31 | SPAN 31 MATERIAL 144 508 543 32 | SPAN 32 EXPERIMENT:previous_work 144 562 568 33 | SPAN 33 VALUE 144 599 612 34 | SPAN 34 VALUE 144 616 622 35 | SPAN 35 MATERIAL 145 191 223 36 | SPAN 36 MATERIAL 145 225 232 37 | SPAN 37 MATERIAL 145 244 250 38 | SPAN 38 DEVICE 146 19 29 39 | SPAN 39 MATERIAL 146 47 54 40 | SPAN 40 MATERIAL 149 132 154 41 | SPAN 41 EXPERIMENT:previous_work 149 185 197 42 | SPAN 42 MATERIAL 149 214 234 43 | SPAN 43 MATERIAL 149 236 239 44 | SPAN 44 MATERIAL 149 241 256 45 | SPAN 45 DEVICE 149 257 265 46 | SPAN 46 EXPERIMENT:previous_work 149 281 285 47 | SPAN 47 MATERIAL 149 321 347 48 | SPAN 48 EXPERIMENT:previous_work 149 356 365 49 | SPAN 49 MATERIAL 149 434 437 50 | SPAN 50 MATERIAL 149 466 492 51 | SPAN 51 EXPERIMENT:previous_work 150 12 20 52 | SPAN 52 EXPERIMENT:previous_work 150 154 162 53 | SPAN 53 MATERIAL 150 169 185 54 | SPAN 54 MATERIAL 150 187 190 55 | SPAN 55 EXPERIMENT:general_info 153 112 124 56 | SPAN 56 DEVICE 153 128 133 57 | SPAN 57 EXPERIMENT:previous_work 153 300 308 58 | SPAN 58 MATERIAL 153 344 351 59 | SPAN 59 MATERIAL 153 353 381 60 | SPAN 60 MATERIAL 153 394 401 61 | EXPERIMENT 1 7 62 | device 8 63 | EXPERIMENT 2 12 64 | device 13 65 | EXPERIMENT 3 17 66 | device 16 67 | cathode_material 18 68 | cathode_material 19 69 | cathode_material 20 70 | EXPERIMENT 4 26 71 | device 25 72 | cathode_material 27 73 | EXPERIMENT 5 28 74 | resistance 29 75 | EXPERIMENT 6 32 76 | cathode_material 31 77 | resistance 33 78 | working_temperature 34 79 | EXPERIMENT 7 41 80 | cathode_material 40 81 | electrolyte_material 42 82 | support_material 44 83 | device 45 84 | EXPERIMENT 8 46 85 | cathode_material 47 86 | EXPERIMENT 9 48 87 | electrolyte_material 49 88 | cathode_material 50 89 | EXPERIMENT 10 52 90 | cathode_material 53 91 | EXPERIMENT 11 55 92 | device 56 93 | EXPERIMENT 12 57 94 | cathode_material 58 95 | cathode_material 59 96 | anode_material 60 97 | LINK experiment_variation 32 28 98 | LINK same_experiment 26 28 99 | LINK same_experiment 41 46 100 | LINK same_experiment 46 48 101 | LINK same_experiment 51 52 102 | LINK coreference 1 2 103 | LINK coreference 3 4 104 | LINK coreference 24 25 105 | LINK coreference 35 36 106 | LINK coreference 42 43 107 | LINK coreference 53 54 108 | -------------------------------------------------------------------------------- /code/source/evaluation/ir_stats.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | import numpy as np 20 | 21 | """ 22 | Utilities for evaluating classification performance. 23 | """ 24 | 25 | 26 | def compute_eval_scores(conf_matrix, labels): 27 | """ 28 | :param conf_matrix: num_classes x num_classes confusion matrix, first dimension is the gold standard label, 29 | second dimension is the predicted label 30 | :param labels: list of labels, indices correspond to the indices in the confusion matrix 31 | :return: accuracy, macro p, r, f, dictionaries with p, r, f by class 32 | """ 33 | num_classes = len(labels) 34 | conf = np.array(conf_matrix) 35 | 36 | p = {} # precision by class 37 | r = {} # recall by class 38 | f1 = {} # F1 by class 39 | 40 | for i in range(num_classes): 41 | cat = labels[i] 42 | correct = conf[i, i] 43 | total_gold = sum(conf[i]) 44 | total_pred = sum(conf[:,i]) 45 | # Define for now: if not predicted this class at all, P=1 46 | if total_pred == 0: 47 | p[cat] = 1 48 | else: 49 | p[cat] = correct/total_pred * 100 50 | if total_gold == 0: 51 | r[cat] = 0 52 | else: 53 | r[cat] = correct/total_gold * 100 54 | # Define for now: if P=0 and R=0 then F1 = 0 55 | if p[cat] == 0 and r[cat] == 0: 56 | f1[cat] = 0 57 | else: 58 | f1[cat] = 2*p[cat]*r[cat]/(p[cat]+r[cat]) 59 | 60 | # macro-averages (each class weighted equally) 61 | macro_p = sum(p.values()) / num_classes 62 | macro_r = sum(r.values()) / num_classes 63 | # macro-avg. F1 is the harmonic mean of macro-p and macro-r 64 | macro_f1 = 2*macro_p*macro_r/(macro_p+macro_r) 65 | 66 | # accuracy 67 | correct = 0 68 | total = 0 69 | for i in range(num_classes): 70 | correct += conf[i,i] 71 | total += sum(conf[i]) 72 | accuracy = correct/total*100 73 | 74 | # print stats - dont want this most of the time 75 | print("Overall accuracy: {:.1f}".format(accuracy)) 76 | print("Macro-avg. precision: {:.1f}".format(macro_p)) 77 | print("Macro-avg. recall: {:.1f}".format(macro_r)) 78 | print("Macro-avg. F1: {:.1f}".format(macro_f1)) 79 | print("Class statistics (P/R/F):") 80 | for i in range(num_classes): 81 | cat = labels[i] 82 | print("{:20s} {:.1f} {:.1f} {:.1f}".format(str(labels[i]), p[cat], r[cat], f1[cat])) 83 | 84 | # return all the results 85 | return accuracy, macro_p, macro_r, macro_f1, p, r, f1 86 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC4735809.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 7 53 69 2 | SPAN 2 MATERIAL 7 71 74 3 | SPAN 3 MATERIAL 7 77 93 4 | SPAN 4 MATERIAL 7 95 98 5 | SPAN 5 MATERIAL 7 105 121 6 | SPAN 6 MATERIAL 7 123 126 7 | SPAN 7 EXPERIMENT:current_exp 7 160 172 8 | SPAN 8 MATERIAL 7 247 249 9 | SPAN 9 MATERIAL 7 278 280 10 | SPAN 10 EXPERIMENT:general_info 21 36 45 11 | SPAN 11 MATERIAL 21 49 60 12 | SPAN 12 MATERIAL 21 89 105 13 | SPAN 13 MATERIAL 21 107 110 14 | SPAN 14 MATERIAL 23 31 34 15 | SPAN 15 EXPERIMENT:previous_work 23 121 132 16 | SPAN 16 MATERIAL 24 120 136 17 | SPAN 17 MATERIAL 24 138 141 18 | SPAN 18 MATERIAL 24 144 160 19 | SPAN 19 MATERIAL 24 162 165 20 | SPAN 20 MATERIAL 24 172 188 21 | SPAN 21 MATERIAL 24 190 193 22 | SPAN 22 MATERIAL 25 0 3 23 | SPAN 23 MATERIAL 25 8 11 24 | SPAN 24 EXPERIMENT:current_exp 25 17 24 25 | SPAN 25 MATERIAL 25 63 66 26 | SPAN 26 EXPERIMENT:current_exp 25 76 88 27 | SPAN 27 MATERIAL 30 70 73 28 | SPAN 28 MATERIAL 30 103 106 29 | SPAN 29 MATERIAL 30 111 114 30 | SPAN 30 EXPERIMENT:current_exp 30 121 133 31 | SPAN 31 MATERIAL 42 114 117 32 | SPAN 32 VALUE 42 163 176 33 | SPAN 33 VALUE 42 180 186 34 | SPAN 34 MATERIAL 42 190 193 35 | SPAN 35 EXPERIMENT:current_exp 42 265 275 36 | SPAN 36 MATERIAL 58 202 205 37 | SPAN 37 MATERIAL 58 210 213 38 | SPAN 38 EXPERIMENT:current_exp 58 214 216 39 | SPAN 39 VALUE 58 229 240 40 | SPAN 40 VALUE 58 244 255 41 | SPAN 41 MATERIAL 59 0 3 42 | SPAN 42 EXPERIMENT:current_exp 59 21 23 43 | SPAN 43 VALUE 59 87 97 44 | SPAN 44 MATERIAL 62 36 39 45 | SPAN 45 MATERIAL 62 44 47 46 | SPAN 46 EXPERIMENT:current_exp 62 72 74 47 | SPAN 47 MATERIAL 71 197 200 48 | SPAN 48 EXPERIMENT:current_exp 71 223 231 49 | SPAN 49 DEVICE 71 245 252 50 | SPAN 50 EXPERIMENT:current_exp 71 284 289 51 | SPAN 51 VALUE 71 296 306 52 | SPAN 52 MATERIAL 71 322 324 53 | SPAN 53 VALUE 71 329 339 54 | SPAN 54 MATERIAL 71 356 358 55 | SPAN 55 VALUE 71 381 387 56 | SPAN 56 VALUE 72 96 109 57 | SPAN 57 MATERIAL 72 113 116 58 | SPAN 58 MATERIAL 72 121 124 59 | SPAN 59 VALUE 117 85 97 60 | SPAN 60 MATERIAL 117 107 110 61 | SPAN 61 VALUE 117 114 122 62 | EXPERIMENT 1 7 63 | cathode_material 1 64 | cathode_material 3 65 | cathode_material 5 66 | fuel_used 8 67 | fuel_used 9 68 | EXPERIMENT 2 10 69 | anode_material 11 70 | cathode_material 12 71 | EXPERIMENT 3 15 72 | anode_material 14 73 | EXPERIMENT 4 24 74 | anode_material 22 75 | anode_material 23 76 | EXPERIMENT 5 26 77 | cathode_material 25 78 | EXPERIMENT 6 30 79 | cathode_material 27 80 | anode_material 28 81 | anode_material 29 82 | EXPERIMENT 7 35 83 | cathode_material 31 84 | resistance 32 85 | working_temperature 33 86 | fuel_used 34 87 | EXPERIMENT 8 38 88 | anode_material 36 89 | anode_material 37 90 | conductivity 39 91 | working_temperature 40 92 | EXPERIMENT 9 42 93 | cathode_material 41 94 | conductivity 43 95 | EXPERIMENT 10 46 96 | cathode_material 44 97 | cathode_material 45 98 | EXPERIMENT 11 48 99 | anode_material 47 100 | device 49 101 | EXPERIMENT 12 50 102 | resistance 51 103 | fuel_used 52 104 | resistance 53 105 | fuel_used 54 106 | working_temperature 55 107 | LINK experiment_variation 26 24 108 | LINK same_experiment 48 50 109 | LINK coreference 1 2 110 | LINK coreference 3 4 111 | LINK coreference 5 6 112 | LINK coreference 12 13 113 | LINK coreference 16 17 114 | LINK coreference 18 19 115 | LINK coreference 20 21 116 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC4663492.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 4 50 53 2 | SPAN 2 VALUE 4 54 64 3 | SPAN 3 EXPERIMENT:current_exp 4 72 74 4 | SPAN 4 MATERIAL 4 110 117 5 | SPAN 5 VALUE 4 158 168 6 | SPAN 6 VALUE 4 173 179 7 | SPAN 7 MATERIAL 7 27 56 8 | SPAN 8 MATERIAL 7 58 64 9 | SPAN 9 EXPERIMENT:general_info 7 80 89 10 | SPAN 10 DEVICE 7 156 178 11 | SPAN 11 DEVICE 7 180 185 12 | SPAN 12 EXPERIMENT:general_info 7 225 227 13 | SPAN 13 MATERIAL 7 261 296 14 | SPAN 14 MATERIAL 7 298 304 15 | SPAN 15 VALUE 7 346 358 16 | SPAN 16 MATERIAL 8 33 36 17 | SPAN 17 MATERIAL 8 38 53 18 | SPAN 18 EXPERIMENT:previous_work 8 64 72 19 | SPAN 19 VALUE 8 93 97 20 | SPAN 20 VALUE 8 99 104 21 | SPAN 21 VALUE 8 110 122 22 | SPAN 22 VALUE 8 126 129 23 | SPAN 23 VALUE 8 131 134 24 | SPAN 24 VALUE 8 140 146 25 | SPAN 25 EXPERIMENT:current_exp 21 19 22 26 | SPAN 26 VALUE 21 65 75 27 | SPAN 27 MATERIAL 21 86 94 28 | SPAN 28 VALUE 21 118 123 29 | SPAN 29 MATERIAL 21 124 127 30 | SPAN 30 MATERIAL 41 29 44 31 | SPAN 31 DEVICE 41 73 102 32 | SPAN 32 EXPERIMENT:current_exp 41 117 125 33 | SPAN 33 VALUE 41 189 211 34 | SPAN 34 EXPERIMENT:current_exp 43 21 29 35 | SPAN 35 VALUE 43 38 44 36 | SPAN 36 EXPERIMENT:current_exp 45 28 32 37 | SPAN 37 VALUE 45 33 35 38 | SPAN 38 VALUE 45 37 39 39 | SPAN 39 VALUE 45 41 43 40 | SPAN 40 VALUE 45 45 48 41 | SPAN 41 VALUE 45 54 65 42 | SPAN 42 VALUE 45 69 72 43 | SPAN 43 VALUE 45 74 77 44 | SPAN 44 VALUE 45 79 82 45 | SPAN 45 VALUE 45 84 87 46 | SPAN 46 VALUE 45 93 99 47 | SPAN 47 VALUE 46 38 44 48 | SPAN 48 EXPERIMENT:current_exp 46 49 57 49 | SPAN 49 MATERIAL 46 73 80 50 | SPAN 50 MATERIAL 47 17 32 51 | SPAN 51 EXPERIMENT:current_exp 47 46 49 52 | SPAN 52 EXPERIMENT:current_exp 47 77 80 53 | SPAN 53 MATERIAL 47 105 121 54 | SPAN 54 MATERIAL 85 21 36 55 | SPAN 55 EXPERIMENT:current_exp 85 50 52 56 | SPAN 56 DEVICE 85 115 120 57 | EXPERIMENT 1 1 58 | power_density 2 59 | EXPERIMENT 2 3 60 | anode_material 4 61 | power_density 5 62 | working_temperature 6 63 | EXPERIMENT 3 9 64 | anode_material 7 65 | device 10 66 | EXPERIMENT 4 12 67 | anode_material 13 68 | working_temperature 15 69 | EXPERIMENT 5 18 70 | electrolyte_material 17 71 | conductivity 19 72 | conductivity 20 73 | conductivity 21 74 | working_temperature 22 75 | working_temperature 23 76 | working_temperature 24 77 | EXPERIMENT 6 25 78 | anode_material 27 79 | anode_material 29 80 | EXPERIMENT 7 32 81 | anode_material 30 82 | device 31 83 | working_temperature 33 84 | EXPERIMENT 8 34 85 | working_temperature 35 86 | EXPERIMENT 9 36 87 | power_density 37 88 | power_density 38 89 | power_density 39 90 | power_density 40 91 | power_density 41 92 | working_temperature 42 93 | working_temperature 43 94 | working_temperature 44 95 | working_temperature 45 96 | working_temperature 46 97 | EXPERIMENT 10 48 98 | working_temperature 47 99 | anode_material 49 100 | EXPERIMENT 11 51 101 | anode_material 50 102 | EXPERIMENT 12 52 103 | anode_material 53 104 | EXPERIMENT 13 55 105 | anode_material 54 106 | device 56 107 | LINK experiment_variation 3 1 108 | LINK experiment_variation 12 9 109 | LINK same_experiment 32 34 110 | LINK experiment_variation 48 36 111 | LINK same_experiment 34 36 112 | LINK experiment_variation 51 48 113 | LINK experiment_variation 52 51 114 | LINK coreference 7 8 115 | LINK coreference 10 11 116 | LINK coreference 13 14 117 | LINK coreference 17 16 118 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5793538.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1106 2 | 2 1 1107 1320 3 | 3 1 1321 1497 4 | 4 1 1498 1786 5 | 5 0 1787 1847 6 | 6 1 1848 2055 7 | 7 1 2056 2289 8 | 8 0 2290 2645 9 | 9 0 2646 2849 10 | 10 0 2850 3130 11 | 11 1 3131 3467 12 | 12 1 3468 3759 13 | 13 0 3760 3987 14 | 14 0 3988 4318 15 | 15 1 4319 4517 16 | 16 1 4518 4698 17 | 17 1 4699 4887 18 | 18 1 4888 5003 19 | 19 1 5004 5138 20 | 20 1 5139 5358 21 | 21 1 5359 5476 22 | 22 1 5477 5719 23 | 23 1 5720 6019 24 | 24 0 6020 6308 25 | 25 1 6309 6691 26 | 26 0 6692 6982 27 | 27 0 6983 7149 28 | 28 0 7150 7305 29 | 29 0 7306 7430 30 | 30 0 7431 7564 31 | 31 0 7565 7671 32 | 32 0 7672 7833 33 | 33 0 7834 8003 34 | 34 1 8004 8171 35 | 35 1 8172 8258 36 | 36 1 8259 8351 37 | 37 0 8352 8466 38 | 38 0 8467 8566 39 | 39 0 8567 8743 40 | 40 0 8744 8854 41 | 41 0 8855 8996 42 | 42 0 8997 9161 43 | 43 0 9162 9280 44 | 44 0 9281 9397 45 | 45 0 9398 9506 46 | 46 0 9507 9815 47 | 47 0 9816 9920 48 | 48 0 9921 10084 49 | 49 0 10085 10253 50 | 50 0 10254 10485 51 | 51 0 10486 10866 52 | 52 0 10867 11061 53 | 53 0 11062 11231 54 | 54 0 11232 11502 55 | 55 0 11503 11530 56 | 56 0 11531 11658 57 | 57 0 11659 11925 58 | 58 0 11926 12182 59 | 59 0 12183 12461 60 | 60 0 12462 12634 61 | 61 0 12635 12811 62 | 62 0 12812 12934 63 | 63 0 12935 13028 64 | 64 0 13029 13110 65 | 65 0 13111 13437 66 | 66 0 13438 13581 67 | 67 0 13582 13778 68 | 68 0 13779 14015 69 | 69 0 14016 14235 70 | 70 0 14236 14383 71 | 71 0 14384 14565 72 | 72 0 14566 14617 73 | 73 0 14618 14781 74 | 74 0 14782 14962 75 | 75 0 14963 15336 76 | 76 0 15337 15512 77 | 77 0 15513 15768 78 | 78 0 15769 15882 79 | 79 0 15883 16008 80 | 80 0 16009 16298 81 | 81 0 16299 16509 82 | 82 1 16510 16705 83 | 83 1 16706 16820 84 | 84 0 16821 16952 85 | 85 1 16953 17181 86 | 86 1 17182 17346 87 | 87 0 17347 17424 88 | 88 0 17425 17651 89 | 89 1 17652 17847 90 | 90 1 17848 18003 91 | 91 1 18004 18254 92 | 92 0 18255 18393 93 | 93 0 18394 18645 94 | 94 0 18646 18889 95 | 95 0 18890 19086 96 | 96 0 19087 19229 97 | 97 0 19230 19484 98 | 98 0 19485 19650 99 | 99 0 19651 19819 100 | 100 0 19820 20295 101 | 101 0 20296 20428 102 | 102 1 20429 20524 103 | 103 0 20525 20750 104 | 104 1 20751 20891 105 | 105 0 20892 21139 106 | 106 0 21140 21333 107 | 107 0 21334 21492 108 | 108 0 21493 21773 109 | 109 1 21774 21977 110 | 110 0 21978 22268 111 | 111 0 22269 22391 112 | 112 0 22392 22509 113 | 113 0 22510 22671 114 | 114 0 22672 22780 115 | 115 0 22781 22936 116 | 116 0 22937 23067 117 | 117 1 23068 23204 118 | 118 0 23205 23281 119 | 119 1 23282 23386 120 | 120 0 23387 23632 121 | 121 0 23633 23840 122 | 122 0 23841 24013 123 | 123 0 24014 24102 124 | 124 0 24103 24170 125 | 125 1 24171 24339 126 | 126 0 24340 24557 127 | 127 1 24558 24658 128 | 128 0 24659 24958 129 | 129 0 24959 25116 130 | 130 0 25117 25519 131 | 131 0 25520 25646 132 | 132 0 25647 25733 133 | 133 1 25734 25853 134 | 134 1 25854 25989 135 | 135 0 25990 26096 136 | 136 0 26097 26288 137 | 137 0 26289 26466 138 | 138 1 26467 26646 139 | 139 0 26647 26847 140 | 140 0 26848 26976 141 | 141 1 26977 27057 142 | 142 0 27058 27200 143 | 143 1 27201 27467 144 | 144 1 27468 27616 145 | 145 0 27617 27732 146 | 146 0 27733 28011 147 | 147 0 28012 28132 148 | 148 0 28133 28313 149 | 149 1 28314 28478 150 | 150 1 28479 28647 151 | 151 1 28648 28952 152 | 152 0 28953 29027 153 | 153 1 29028 29204 154 | 154 0 29205 29346 155 | 155 0 29347 29468 156 | 156 0 29469 29623 157 | 157 0 29624 29809 158 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4673446.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 906 2 | 2 0 907 1257 3 | 3 1 1258 1518 4 | 4 1 1519 1695 5 | 5 1 1696 1784 6 | 6 0 1785 1878 7 | 7 0 1879 2052 8 | 8 0 2053 2313 9 | 9 0 2314 2470 10 | 10 0 2471 2660 11 | 11 0 2661 2796 12 | 12 1 2797 2960 13 | 13 1 2961 3296 14 | 14 1 3297 3470 15 | 15 1 3471 3663 16 | 16 0 3664 3847 17 | 17 1 3848 4122 18 | 18 0 4123 4179 19 | 19 1 4180 4417 20 | 20 0 4418 4589 21 | 21 1 4590 4889 22 | 22 0 4890 5035 23 | 23 0 5036 5211 24 | 24 1 5212 5411 25 | 25 0 5412 5591 26 | 26 1 5592 5761 27 | 27 1 5762 5967 28 | 28 0 5968 7035 29 | 29 0 7036 7254 30 | 30 0 7255 7416 31 | 31 0 7417 7687 32 | 32 0 7688 7849 33 | 33 0 7850 8034 34 | 34 0 8035 8231 35 | 35 0 8232 8420 36 | 36 0 8421 8543 37 | 37 0 8544 8747 38 | 38 0 8748 8946 39 | 39 0 8947 9057 40 | 40 0 9058 9329 41 | 41 0 9330 9462 42 | 42 0 9463 9701 43 | 43 0 9702 9839 44 | 44 0 9840 10022 45 | 45 0 10023 10206 46 | 46 0 10207 10375 47 | 47 1 10376 10481 48 | 48 0 10482 10678 49 | 49 0 10679 10987 50 | 50 0 10988 11108 51 | 51 0 11109 11370 52 | 52 1 11371 11545 53 | 53 0 11546 11710 54 | 54 0 11711 11885 55 | 55 0 11886 12069 56 | 56 1 12070 12430 57 | 57 1 12431 12713 58 | 58 1 12714 12898 59 | 59 1 12899 13290 60 | 60 1 13291 13501 61 | 61 1 13502 13627 62 | 62 1 13628 13761 63 | 63 1 13762 13933 64 | 64 0 13934 14089 65 | 65 1 14090 14287 66 | 66 1 14288 14592 67 | 67 1 14593 14794 68 | 68 1 14795 15072 69 | 69 1 15073 15210 70 | 70 1 15211 15285 71 | 71 0 15286 15379 72 | 72 1 15380 15503 73 | 73 1 15504 15792 74 | 74 0 15793 15989 75 | 75 1 15990 16181 76 | 76 1 16182 16321 77 | 77 1 16322 16679 78 | 78 1 16680 16839 79 | 79 1 16840 17019 80 | 80 1 17020 17228 81 | 81 1 17229 17431 82 | 82 1 17432 17547 83 | 83 1 17548 17698 84 | 84 1 17699 17866 85 | 85 1 17867 18020 86 | 86 1 18021 18223 87 | 87 1 18224 18395 88 | 88 1 18396 18545 89 | 89 1 18546 18683 90 | 90 1 18684 18878 91 | 91 1 18879 19016 92 | 92 1 19017 19121 93 | 93 1 19122 19246 94 | 94 0 19247 19422 95 | 95 1 19423 19621 96 | 96 1 19622 19794 97 | 97 1 19795 19930 98 | 98 1 19931 20071 99 | 99 0 20072 20141 100 | 100 1 20142 20267 101 | 101 1 20268 20509 102 | 102 0 20510 20684 103 | 103 0 20685 21090 104 | 104 0 21091 21218 105 | 105 0 21219 21370 106 | 106 0 21371 21624 107 | 107 0 21625 21770 108 | 108 0 21771 21930 109 | 109 0 21931 22194 110 | 110 0 22195 22395 111 | 111 0 22396 22648 112 | 112 0 22649 22773 113 | 113 0 22774 22911 114 | 114 0 22912 23169 115 | 115 0 23170 23457 116 | 116 0 23458 23801 117 | 117 0 23802 23978 118 | 118 0 23979 24165 119 | 119 0 24166 24333 120 | 120 0 24334 24483 121 | 121 0 24484 24760 122 | 122 0 24761 25071 123 | 123 0 25072 25282 124 | 124 0 25283 25401 125 | 125 0 25402 25584 126 | 126 0 25585 25711 127 | 127 0 25712 25960 128 | 128 0 25961 26187 129 | 129 1 26188 26397 130 | 130 0 26398 26658 131 | 131 0 26659 26788 132 | 132 0 26789 27091 133 | 133 0 27092 27315 134 | 134 0 27316 27531 135 | 135 0 27532 27602 136 | 136 0 27603 27748 137 | 137 0 27749 27898 138 | 138 0 27899 28005 139 | 139 0 28006 28073 140 | 140 0 28074 28352 141 | 141 0 28353 28409 142 | 142 0 28410 28591 143 | 143 0 28592 28733 144 | 144 0 28734 28904 145 | 145 0 28905 29076 146 | 146 0 29077 29238 147 | 147 0 29239 29461 148 | 148 0 29462 29547 149 | 149 0 29548 29628 150 | 150 0 29629 29800 151 | 151 0 29801 29885 152 | 152 0 29886 30065 153 | 153 0 30066 30186 154 | 154 0 30187 30251 155 | 155 0 30252 30374 156 | 156 0 30375 30379 157 | 157 0 30380 30425 158 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5456869.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 257 2 | 2 1 258 751 3 | 3 0 752 880 4 | 4 1 881 1088 5 | 5 0 1089 1267 6 | 6 0 1268 1452 7 | 7 0 1453 1816 8 | 8 0 1817 2016 9 | 9 0 2017 2264 10 | 10 0 2265 2322 11 | 11 0 2323 2433 12 | 12 0 2434 2748 13 | 13 0 2749 2858 14 | 14 0 2859 3019 15 | 15 1 3020 3190 16 | 16 1 3191 3431 17 | 17 0 3432 3546 18 | 18 0 3547 3740 19 | 19 0 3741 3950 20 | 20 0 3951 4191 21 | 21 0 4192 4443 22 | 22 0 4444 4633 23 | 23 0 4634 4740 24 | 24 0 4741 4920 25 | 25 1 4921 5170 26 | 26 0 5171 5366 27 | 27 0 5367 5531 28 | 28 0 5532 5653 29 | 29 0 5654 5822 30 | 30 0 5823 5937 31 | 31 0 5938 6077 32 | 32 0 6078 6214 33 | 33 0 6215 6504 34 | 34 0 6505 6570 35 | 35 0 6571 6632 36 | 36 0 6633 6981 37 | 37 0 6982 7060 38 | 38 0 7061 7296 39 | 39 0 7297 7375 40 | 40 0 7376 7432 41 | 41 0 7433 7584 42 | 42 0 7585 7666 43 | 43 0 7667 7856 44 | 44 0 7857 7987 45 | 45 0 7988 8092 46 | 46 0 8093 8189 47 | 47 0 8190 8384 48 | 48 0 8385 8500 49 | 49 0 8501 8737 50 | 50 0 8738 8866 51 | 51 0 8867 8998 52 | 52 0 8999 9145 53 | 53 0 9146 9249 54 | 54 0 9250 9460 55 | 55 0 9461 9597 56 | 56 0 9598 9775 57 | 57 0 9776 9868 58 | 58 0 9869 9985 59 | 59 0 9986 10078 60 | 60 0 10079 10179 61 | 61 0 10180 10229 62 | 62 0 10230 10331 63 | 63 0 10332 10498 64 | 64 0 10499 10669 65 | 65 0 10670 10852 66 | 66 0 10853 10963 67 | 67 0 10964 11139 68 | 68 0 11140 11166 69 | 69 0 11167 11269 70 | 70 0 11270 11353 71 | 71 0 11354 11404 72 | 72 0 11405 11487 73 | 73 0 11488 11577 74 | 74 0 11578 11690 75 | 75 0 11691 11739 76 | 76 0 11740 11999 77 | 77 0 12000 12069 78 | 78 0 12070 12256 79 | 79 0 12257 12373 80 | 80 0 12374 12571 81 | 81 0 12572 12771 82 | 82 0 12772 12850 83 | 83 0 12851 13020 84 | 84 0 13021 13111 85 | 85 0 13112 13493 86 | 86 0 13494 13633 87 | 87 0 13634 13873 88 | 88 0 13874 14105 89 | 89 0 14106 14232 90 | 90 0 14233 14278 91 | 91 0 14279 14365 92 | 92 0 14366 14501 93 | 93 0 14502 14642 94 | 94 0 14643 14734 95 | 95 0 14735 14850 96 | 96 0 14851 14942 97 | 97 0 14943 15063 98 | 98 0 15064 15245 99 | 99 0 15246 15347 100 | 100 0 15348 15495 101 | 101 0 15496 15764 102 | 102 0 15765 15863 103 | 103 0 15864 15983 104 | 104 0 15984 16085 105 | 105 0 16086 16227 106 | 106 0 16228 16414 107 | 107 0 16415 16550 108 | 108 0 16551 16845 109 | 109 0 16846 17022 110 | 110 0 17023 17255 111 | 111 0 17256 17533 112 | 112 0 17534 17688 113 | 113 0 17689 17768 114 | 114 0 17769 17901 115 | 115 0 17902 18048 116 | 116 0 18049 18130 117 | 117 0 18131 18215 118 | 118 0 18216 18375 119 | 119 0 18376 18562 120 | 120 0 18563 18756 121 | 121 0 18757 18957 122 | 122 0 18958 19058 123 | 123 0 19059 19248 124 | 124 0 19249 19468 125 | 125 0 19469 19608 126 | 126 0 19609 19720 127 | 127 0 19721 19792 128 | 128 0 19793 19947 129 | 129 0 19948 20074 130 | 130 0 20075 20304 131 | 131 0 20305 20484 132 | 132 0 20485 20630 133 | 133 0 20631 20773 134 | 134 0 20774 20944 135 | 135 0 20945 21095 136 | 136 0 21096 21394 137 | 137 0 21395 21509 138 | 138 1 21510 21711 139 | 139 0 21712 21965 140 | 140 0 21966 22159 141 | 141 0 22160 22224 142 | 142 1 22225 22521 143 | 143 0 22522 22693 144 | 144 1 22694 22782 145 | 145 0 22783 22914 146 | 146 1 22915 23003 147 | 147 0 23004 23078 148 | 148 1 23079 23246 149 | 149 0 23247 23514 150 | 150 0 23515 23721 151 | 151 0 23722 24099 152 | 152 0 24100 24200 153 | 153 0 24201 24323 154 | 154 1 24324 24551 155 | 155 1 24552 24760 156 | 156 0 24761 24960 157 | 157 0 24961 25301 158 | 158 0 25302 25423 159 | 159 1 25424 25607 160 | 160 1 25608 25742 161 | 161 0 25743 26029 162 | 162 0 26030 26213 163 | 163 0 26214 26341 164 | 164 1 26342 26442 165 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6337513.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1645 2 | 2 1 1646 1816 3 | 3 1 1817 2173 4 | 4 1 2174 2479 5 | 5 1 2480 2604 6 | 6 1 2605 2844 7 | 7 0 2845 2933 8 | 8 0 2934 3070 9 | 9 0 3071 3334 10 | 10 0 3335 3530 11 | 11 0 3531 3616 12 | 12 0 3617 3863 13 | 13 0 3864 4053 14 | 14 0 4054 4210 15 | 15 0 4211 4297 16 | 16 0 4298 4469 17 | 17 0 4470 4732 18 | 18 0 4733 4902 19 | 19 0 4903 5155 20 | 20 1 5156 5263 21 | 21 0 5264 5538 22 | 22 0 5539 5769 23 | 23 0 5770 6005 24 | 24 0 6006 6212 25 | 25 0 6213 6434 26 | 26 0 6435 6461 27 | 27 0 6462 6852 28 | 28 0 6853 6990 29 | 29 0 6991 7211 30 | 30 0 7212 7333 31 | 31 0 7334 7590 32 | 32 0 7591 7712 33 | 33 0 7713 7871 34 | 34 0 7872 8125 35 | 35 0 8126 8298 36 | 36 0 8299 8510 37 | 37 0 8511 8683 38 | 38 0 8684 9003 39 | 39 0 9004 9232 40 | 40 0 9233 9398 41 | 41 0 9399 9511 42 | 42 0 9512 9664 43 | 43 0 9665 9950 44 | 44 0 9951 9978 45 | 45 0 9979 10127 46 | 46 0 10128 10532 47 | 47 0 10533 10694 48 | 48 0 10695 10943 49 | 49 0 10944 11286 50 | 50 0 11287 11458 51 | 51 0 11459 11570 52 | 52 0 11571 11729 53 | 53 0 11730 11890 54 | 54 0 11891 12111 55 | 55 0 12112 12354 56 | 56 0 12355 12469 57 | 57 0 12470 12649 58 | 58 0 12650 12858 59 | 59 0 12859 13167 60 | 60 0 13168 13336 61 | 61 0 13337 13656 62 | 62 0 13657 13820 63 | 63 0 13821 13963 64 | 64 0 13964 14071 65 | 65 1 14072 14223 66 | 66 1 14224 14354 67 | 67 0 14355 14586 68 | 68 0 14587 14733 69 | 69 1 14734 14986 70 | 70 0 14987 15166 71 | 71 0 15167 15237 72 | 72 0 15238 15379 73 | 73 0 15380 15483 74 | 74 0 15484 15761 75 | 75 0 15762 15935 76 | 76 0 15936 16051 77 | 77 1 16052 16192 78 | 78 1 16193 16468 79 | 79 0 16469 16670 80 | 80 0 16671 16853 81 | 81 0 16854 16988 82 | 82 0 16989 17110 83 | 83 0 17111 17270 84 | 84 0 17271 17479 85 | 85 0 17480 17735 86 | 86 0 17736 17894 87 | 87 0 17895 18133 88 | 88 1 18134 18391 89 | 89 0 18392 18587 90 | 90 1 18588 18792 91 | 91 0 18793 18923 92 | 92 0 18924 19103 93 | 93 0 19104 19265 94 | 94 0 19266 19640 95 | 95 0 19641 19748 96 | 96 0 19749 19973 97 | 97 0 19974 20268 98 | 98 0 20269 20551 99 | 99 0 20552 20612 100 | 100 0 20613 20713 101 | 101 0 20714 20966 102 | 102 0 20967 21218 103 | 103 0 21219 21467 104 | 104 0 21468 21602 105 | 105 0 21603 21859 106 | 106 0 21860 22023 107 | 107 0 22024 22160 108 | 108 0 22161 22314 109 | 109 0 22315 22500 110 | 110 0 22501 22823 111 | 111 0 22824 23008 112 | 112 0 23009 23682 113 | 113 0 23683 23885 114 | 114 0 23886 24102 115 | 115 0 24103 24241 116 | 116 0 24242 24378 117 | 117 0 24379 24534 118 | 118 0 24535 24669 119 | 119 1 24670 24949 120 | 120 0 24950 25030 121 | 121 0 25031 25168 122 | 122 0 25169 25367 123 | 123 0 25368 25535 124 | 124 0 25536 25671 125 | 125 0 25672 26072 126 | 126 0 26073 26143 127 | 127 0 26144 26224 128 | 128 0 26225 26404 129 | 129 0 26405 26646 130 | 130 0 26647 26884 131 | 131 0 26885 27038 132 | 132 1 27039 27189 133 | 133 0 27190 27384 134 | 134 0 27385 27588 135 | 135 0 27589 27702 136 | 136 1 27703 27886 137 | 137 0 27887 28075 138 | 138 0 28076 28197 139 | 139 0 28198 28517 140 | 140 0 28518 28755 141 | 141 0 28756 28938 142 | 142 0 28939 29162 143 | 143 1 29163 29353 144 | 144 1 29354 29551 145 | 145 1 29552 29607 146 | 146 1 29608 29813 147 | 147 0 29814 29967 148 | 148 0 29968 30192 149 | 149 0 30193 30415 150 | 150 0 30416 30575 151 | 151 0 30576 30773 152 | 152 0 30774 31053 153 | 153 0 31054 31369 154 | 154 1 31370 31552 155 | 155 0 31553 31823 156 | 156 0 31824 32361 157 | 157 1 32362 32552 158 | 158 0 32553 32850 159 | 159 1 32851 33112 160 | 160 0 33113 33229 161 | 161 1 33230 33448 162 | 162 0 33449 33642 163 | 163 0 33643 33839 164 | 164 0 33840 34055 165 | 165 0 34056 34262 166 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5216129.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1271 2 | 2 1 1272 1408 3 | 3 1 1409 1653 4 | 4 0 1654 1844 5 | 5 0 1845 1961 6 | 6 0 1962 2065 7 | 7 1 2066 2263 8 | 8 0 2264 2526 9 | 9 0 2527 2735 10 | 10 0 2736 2878 11 | 11 0 2879 3155 12 | 12 1 3156 3284 13 | 13 1 3285 3601 14 | 14 1 3602 3849 15 | 15 1 3850 4034 16 | 16 1 4035 4402 17 | 17 0 4403 4690 18 | 18 1 4691 4924 19 | 19 0 4925 5085 20 | 20 1 5086 5354 21 | 21 1 5355 5521 22 | 22 1 5522 5639 23 | 23 1 5640 5853 24 | 24 0 5854 6139 25 | 25 0 6140 6279 26 | 26 0 6280 6494 27 | 27 0 6495 6632 28 | 28 0 6633 6792 29 | 29 0 6793 6963 30 | 30 0 6964 7104 31 | 31 1 7105 7299 32 | 32 0 7300 7520 33 | 33 0 7521 7861 34 | 34 1 7862 8147 35 | 35 1 8148 8287 36 | 36 0 8288 8522 37 | 37 1 8523 8718 38 | 38 1 8719 8804 39 | 39 1 8805 9059 40 | 40 1 9060 9460 41 | 41 0 9461 9752 42 | 42 1 9753 9940 43 | 43 0 9941 10029 44 | 44 1 10030 10282 45 | 45 1 10283 10421 46 | 46 0 10422 10656 47 | 47 1 10657 10915 48 | 48 1 10916 11105 49 | 49 1 11106 11280 50 | 50 1 11281 11616 51 | 51 0 11617 11787 52 | 52 0 11788 12148 53 | 53 1 12149 12418 54 | 54 0 12419 12546 55 | 55 0 12547 12843 56 | 56 0 12844 12933 57 | 57 0 12934 13038 58 | 58 0 13039 13311 59 | 59 0 13312 13436 60 | 60 0 13437 13771 61 | 61 1 13772 13951 62 | 62 0 13952 14083 63 | 63 0 14084 14276 64 | 64 0 14277 14743 65 | 65 0 14744 14873 66 | 66 0 14874 15119 67 | 67 0 15120 15319 68 | 68 1 15320 15511 69 | 69 0 15512 15652 70 | 70 0 15653 15781 71 | 71 0 15782 15901 72 | 72 0 15902 15986 73 | 73 0 15987 16112 74 | 74 0 16113 16395 75 | 75 0 16396 16559 76 | 76 0 16560 16740 77 | 77 0 16741 16904 78 | 78 0 16905 17099 79 | 79 0 17100 17616 80 | 80 0 17617 17738 81 | 81 0 17739 17809 82 | 82 0 17810 18020 83 | 83 0 18021 18246 84 | 84 0 18247 18415 85 | 85 0 18416 18612 86 | 86 0 18613 19042 87 | 87 0 19043 19213 88 | 88 0 19214 19439 89 | 89 0 19440 19582 90 | 90 0 19583 19951 91 | 91 0 19952 20193 92 | 92 1 20194 20328 93 | 93 1 20329 20436 94 | 94 1 20437 20558 95 | 95 0 20559 20795 96 | 96 1 20796 21018 97 | 97 0 21019 21150 98 | 98 0 21151 21256 99 | 99 1 21257 21579 100 | 100 1 21580 21691 101 | 101 1 21692 21915 102 | 102 0 21916 22210 103 | 103 0 22211 22518 104 | 104 0 22519 22851 105 | 105 0 22852 22949 106 | 106 0 22950 23142 107 | 107 0 23143 23263 108 | 108 0 23264 23550 109 | 109 0 23551 23680 110 | 110 0 23681 23820 111 | 111 0 23821 23976 112 | 112 0 23977 24141 113 | 113 0 24142 24296 114 | 114 0 24297 24453 115 | 115 0 24454 24572 116 | 116 0 24573 24649 117 | 117 0 24650 24758 118 | 118 0 24759 24916 119 | 119 0 24917 24969 120 | 120 0 24970 25052 121 | 121 0 25053 25131 122 | 122 0 25132 25246 123 | 123 0 25247 25725 124 | 124 0 25726 25856 125 | 125 0 25857 26032 126 | 126 0 26033 26279 127 | 127 0 26280 26388 128 | 128 0 26389 26480 129 | 129 0 26481 26579 130 | 130 0 26580 26716 131 | 131 0 26717 26822 132 | 132 0 26823 26976 133 | 133 0 26977 27212 134 | 134 0 27213 27279 135 | 135 0 27280 27405 136 | 136 0 27406 27584 137 | 137 0 27585 27667 138 | 138 0 27668 27782 139 | 139 0 27783 27875 140 | 140 0 27876 27959 141 | 141 0 27960 28122 142 | 142 0 28123 28214 143 | 143 0 28215 28359 144 | 144 0 28360 28568 145 | 145 0 28569 28673 146 | 146 0 28674 28738 147 | 147 0 28739 28825 148 | 148 0 28826 28934 149 | 149 0 28935 29120 150 | 150 0 29121 29309 151 | 151 0 29310 29468 152 | 152 0 29469 29632 153 | 153 0 29633 29858 154 | 154 0 29859 30123 155 | 155 0 30124 30213 156 | 156 0 30214 30293 157 | 157 0 30294 30356 158 | 158 0 30357 30624 159 | 159 0 30625 30872 160 | 160 0 30873 30974 161 | 161 0 30975 31015 162 | 162 0 31016 31152 163 | 163 0 31153 31215 164 | 164 0 31216 31317 165 | 165 0 31318 31322 166 | 166 0 31323 31372 167 | 167 0 31373 31509 168 | -------------------------------------------------------------------------------- /code/source/dataHandling/sofc_exp_utils.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Annemarie Friedrich 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as published 7 | by the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | You should have received a copy of the GNU Affero General Public License 15 | along with this program. If not, see . 16 | """ 17 | 18 | 19 | from collections import defaultdict 20 | import csv 21 | 22 | 23 | def modify_cross_val_data_split(doc_info, num_folds, fold): 24 | """ 25 | :param doc_info: corpus metadata dictionary (as created by get_sofc_corpus_metadata 26 | :param num_folds: total number of cross validation folds 27 | :param fold: the fold for which to return the train/dev split, folds are indexed by 1, 2, ... 28 | :return: the modified doc_info object 29 | """ 30 | # sort training document IDs alphabetically 31 | train_ids = sorted([docid for docid in doc_info if doc_info[docid]["datasplit"] in set(["train", "dev"])]) 32 | # create folds 33 | fold -= 1 # function called with 1 = first fold etc. 34 | # set all documents to train 35 | for docid in train_ids: 36 | doc_info[docid]["datasplit"] = "train" 37 | # pick the fold's dev documents 38 | for i in range(fold, len(train_ids), num_folds): 39 | docid = train_ids[i] 40 | doc_info[docid]["datasplit"] = "dev" 41 | 42 | 43 | def get_sofc_corpus_metadata(meta_csv_file): 44 | """ 45 | Read list of all documents with licensing information, predefined data splits etc. 46 | :param meta_csv_file: CSV file with data split information. 47 | :return: a dictionary with the relevant information on data split, licensing, annotator for each document 48 | """ 49 | doc_info = defaultdict(dict) 50 | with open(meta_csv_file, encoding='utf-8') as csvfile: 51 | csvreader = csv.reader(csvfile, delimiter='\t') 52 | header = next(csvreader) 53 | for row in csvreader: 54 | docid = row[header.index("name")] 55 | doc_info[docid]["license"] = row[header.index("license")] 56 | doc_info[docid]["datasplit"] = row[header.index("set")] 57 | return doc_info 58 | 59 | 60 | def get_data_split_docids(meta_csv_file, num_cross_val_folds=None, current_cross_val_fold=None): 61 | """ 62 | :param meta_csv_file: CSV file with data split information 63 | :param num_cross_val_folds: if using cross validation, specify total number of splits 64 | :param fold: the current fold 65 | :return: returns the train, dev and test ids to use in this experiment. 66 | """ 67 | # retrieve data split as defined in metadata 68 | doc_info = get_sofc_corpus_metadata(meta_csv_file) 69 | if num_cross_val_folds is not None: 70 | print("document info:", len(doc_info)) 71 | modify_cross_val_data_split(doc_info, num_cross_val_folds, current_cross_val_fold) 72 | train_ids, dev_ids, test_ids = [], [], [] 73 | for docid in doc_info: 74 | if doc_info[docid]["datasplit"] == "train": 75 | train_ids.append(docid) 76 | elif doc_info[docid]["datasplit"] == "dev": 77 | dev_ids.append(docid) 78 | elif doc_info[docid]["datasplit"] == "test": 79 | test_ids.append(docid) 80 | return train_ids, dev_ids, test_ids 81 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4986314.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 885 2 | 2 1 886 1048 3 | 3 0 1049 1271 4 | 4 0 1272 1520 5 | 5 0 1521 1643 6 | 6 0 1644 1959 7 | 7 0 1960 2306 8 | 8 0 2307 2582 9 | 9 0 2583 2698 10 | 10 0 2699 2944 11 | 11 0 2945 3058 12 | 12 0 3059 3219 13 | 13 0 3220 3342 14 | 14 0 3343 3352 15 | 15 0 3353 3533 16 | 16 0 3534 3721 17 | 17 0 3722 3910 18 | 18 0 3911 4126 19 | 19 0 4127 4295 20 | 20 0 4296 4457 21 | 21 0 4458 4700 22 | 22 1 4701 4861 23 | 23 0 4862 5020 24 | 24 0 5021 5098 25 | 25 0 5099 5279 26 | 26 0 5280 5400 27 | 27 0 5401 5553 28 | 28 0 5554 5741 29 | 29 0 5742 5950 30 | 30 0 5951 6105 31 | 31 0 6106 6297 32 | 32 0 6298 6399 33 | 33 0 6400 6572 34 | 34 0 6573 6748 35 | 35 0 6749 7031 36 | 36 0 7032 7118 37 | 37 0 7119 7321 38 | 38 0 7322 7580 39 | 39 0 7581 7778 40 | 40 0 7779 7885 41 | 41 0 7886 8143 42 | 42 0 8144 8438 43 | 43 0 8439 8582 44 | 44 0 8583 8660 45 | 45 0 8661 8793 46 | 46 0 8794 9041 47 | 47 0 9042 9149 48 | 48 0 9150 9225 49 | 49 0 9226 9330 50 | 50 0 9331 9458 51 | 51 0 9459 9667 52 | 52 0 9668 9804 53 | 53 0 9805 10111 54 | 54 0 10112 10271 55 | 55 0 10272 10456 56 | 56 0 10457 10545 57 | 57 0 10546 10679 58 | 58 0 10680 10857 59 | 59 0 10858 11023 60 | 60 0 11024 11069 61 | 61 0 11070 11226 62 | 62 0 11227 11347 63 | 63 0 11348 11564 64 | 64 0 11565 11714 65 | 65 0 11715 11796 66 | 66 0 11797 11966 67 | 67 0 11967 12107 68 | 68 0 12108 12175 69 | 69 0 12176 12277 70 | 70 0 12278 12418 71 | 71 0 12419 12736 72 | 72 0 12737 12836 73 | 73 0 12837 13027 74 | 74 0 13028 13151 75 | 75 0 13152 13209 76 | 76 0 13210 13419 77 | 77 0 13420 13735 78 | 78 0 13736 13992 79 | 79 0 13993 14241 80 | 80 0 14242 14338 81 | 81 0 14339 14505 82 | 82 0 14506 14637 83 | 83 0 14638 14829 84 | 84 0 14830 14968 85 | 85 0 14969 15118 86 | 86 0 15119 15274 87 | 87 0 15275 15583 88 | 88 0 15584 15596 89 | 89 0 15597 15709 90 | 90 0 15710 15870 91 | 91 0 15871 15971 92 | 92 0 15972 16226 93 | 93 0 16227 16484 94 | 94 0 16485 16667 95 | 95 0 16668 16677 96 | 96 0 16678 16788 97 | 97 0 16789 16941 98 | 98 0 16942 17095 99 | 99 0 17096 17288 100 | 100 0 17289 17459 101 | 101 0 17460 17596 102 | 102 0 17597 17738 103 | 103 0 17739 17903 104 | 104 0 17904 18200 105 | 105 0 18201 18299 106 | 106 0 18300 18368 107 | 107 0 18369 18535 108 | 108 0 18536 18617 109 | 109 0 18618 18702 110 | 110 0 18703 18792 111 | 111 0 18793 18928 112 | 112 0 18929 18975 113 | 113 0 18976 19114 114 | 114 0 19115 19123 115 | 115 0 19124 19297 116 | 116 0 19298 19427 117 | 117 0 19428 19900 118 | 118 0 19901 20060 119 | 119 0 20061 20160 120 | 120 0 20161 20411 121 | 121 0 20412 20662 122 | 122 0 20663 20753 123 | 123 0 20754 20864 124 | 124 0 20865 21043 125 | 125 1 21044 21209 126 | 126 0 21210 21564 127 | 127 0 21565 21703 128 | 128 0 21704 21805 129 | 129 0 21806 21913 130 | 130 0 21914 22049 131 | 131 0 22050 22201 132 | 132 0 22202 22398 133 | 133 0 22399 22581 134 | 134 0 22582 22832 135 | 135 0 22833 23031 136 | 136 0 23032 23250 137 | 137 0 23251 23306 138 | 138 0 23307 23494 139 | 139 0 23495 23608 140 | 140 0 23609 23679 141 | 141 0 23680 23777 142 | 142 0 23778 24162 143 | 143 0 24163 24438 144 | 144 0 24439 24704 145 | 145 0 24705 24871 146 | 146 0 24872 25005 147 | 147 0 25006 25114 148 | 148 0 25115 25214 149 | 149 0 25215 25429 150 | 150 0 25430 25551 151 | 151 0 25552 25734 152 | 152 0 25735 26026 153 | 153 0 26027 26036 154 | 154 0 26037 26134 155 | 155 0 26135 26237 156 | 156 0 26238 26488 157 | 157 0 26489 26571 158 | 158 0 26572 26761 159 | 159 0 26762 26867 160 | 160 0 26868 27042 161 | 161 0 27043 27217 162 | 162 1 27218 27411 163 | 163 0 27412 27512 164 | 164 0 27513 27610 165 | 165 0 27611 27799 166 | 166 0 27800 27901 167 | 167 0 27902 28104 168 | 168 0 28105 28333 169 | 169 0 28334 28448 170 | 170 0 28449 28751 171 | 171 0 28752 28970 172 | 172 0 28971 29116 173 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5457246.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 312 2 | 2 0 313 571 3 | 3 0 572 757 4 | 4 1 758 1001 5 | 5 0 1002 1105 6 | 6 1 1106 1288 7 | 7 1 1289 1449 8 | 8 1 1450 1618 9 | 9 1 1619 1804 10 | 10 0 1805 2051 11 | 11 1 2052 2202 12 | 12 1 2203 2608 13 | 13 0 2609 2863 14 | 14 0 2864 3236 15 | 15 0 3237 3440 16 | 16 0 3441 3671 17 | 17 0 3672 3819 18 | 18 0 3820 4029 19 | 19 0 4030 4285 20 | 20 0 4286 4585 21 | 21 0 4586 4733 22 | 22 1 4734 5015 23 | 23 0 5016 5236 24 | 24 0 5237 5520 25 | 25 0 5521 5577 26 | 26 1 5578 5767 27 | 27 1 5768 5921 28 | 28 0 5922 6012 29 | 29 1 6013 6358 30 | 30 0 6359 6449 31 | 31 0 6450 6785 32 | 32 0 6786 7021 33 | 33 0 7022 7144 34 | 34 0 7145 7299 35 | 35 1 7300 7917 36 | 36 0 7918 7999 37 | 37 0 8000 8145 38 | 38 0 8146 8335 39 | 39 0 8336 8540 40 | 40 0 8541 8652 41 | 41 0 8653 8845 42 | 42 0 8846 8948 43 | 43 0 8949 9059 44 | 44 0 9060 9182 45 | 45 0 9183 9443 46 | 46 0 9444 9586 47 | 47 0 9587 9722 48 | 48 0 9723 9963 49 | 49 0 9964 10095 50 | 50 0 10096 10325 51 | 51 0 10326 10539 52 | 52 0 10540 10744 53 | 53 0 10745 10914 54 | 54 0 10915 11096 55 | 55 0 11097 11308 56 | 56 0 11309 11468 57 | 57 0 11469 11620 58 | 58 0 11621 11802 59 | 59 0 11803 11918 60 | 60 0 11919 12119 61 | 61 0 12120 12420 62 | 62 0 12421 12716 63 | 63 0 12717 12954 64 | 64 1 12955 13216 65 | 65 0 13217 13297 66 | 66 0 13298 13475 67 | 67 1 13476 13582 68 | 68 0 13583 13817 69 | 69 1 13818 13915 70 | 70 0 13916 14083 71 | 71 1 14084 14170 72 | 72 0 14171 14434 73 | 73 0 14435 14549 74 | 74 1 14550 14639 75 | 75 1 14640 14765 76 | 76 1 14766 14933 77 | 77 0 14934 15104 78 | 78 0 15105 15205 79 | 79 0 15206 15318 80 | 80 0 15319 15484 81 | 81 1 15485 15564 82 | 82 1 15565 15676 83 | 83 1 15677 15821 84 | 84 1 15822 15910 85 | 85 1 15911 16123 86 | 86 0 16124 16250 87 | 87 1 16251 16435 88 | 88 1 16436 16498 89 | 89 1 16499 16603 90 | 90 0 16604 16769 91 | 91 0 16770 16840 92 | 92 0 16841 16954 93 | 93 0 16955 17090 94 | 94 0 17091 17159 95 | 95 1 17160 17330 96 | 96 1 17331 17427 97 | 97 1 17428 17680 98 | 98 0 17681 17830 99 | 99 0 17831 17941 100 | 100 0 17942 18108 101 | 101 0 18109 18318 102 | 102 0 18319 18495 103 | 103 1 18496 18587 104 | 104 1 18588 18697 105 | 105 0 18698 18937 106 | 106 0 18938 19097 107 | 107 0 19098 19261 108 | 108 0 19262 19395 109 | 109 0 19396 19548 110 | 110 1 19549 19742 111 | 111 1 19743 19828 112 | 112 0 19829 19924 113 | 113 0 19925 20167 114 | 114 0 20168 20414 115 | 115 0 20415 20573 116 | 116 1 20574 20692 117 | 117 0 20693 21044 118 | 118 0 21045 21562 119 | 119 0 21563 21945 120 | 120 0 21946 22102 121 | 121 0 22103 22302 122 | 122 0 22303 22462 123 | 123 0 22463 22546 124 | 124 0 22547 22734 125 | 125 0 22735 22818 126 | 126 0 22819 22862 127 | 127 0 22863 22985 128 | 128 0 22986 23130 129 | 129 0 23131 23259 130 | 130 0 23260 23297 131 | 131 0 23298 23365 132 | 132 0 23366 23495 133 | 133 0 23496 23665 134 | 134 0 23666 23823 135 | 135 0 23824 23930 136 | 136 0 23931 24039 137 | 137 0 24040 24261 138 | 138 0 24262 24370 139 | 139 0 24371 24622 140 | 140 0 24623 24833 141 | 141 0 24834 24924 142 | 142 0 24925 25150 143 | 143 0 25151 25349 144 | 144 0 25350 25507 145 | 145 0 25508 25759 146 | 146 0 25760 25890 147 | 147 0 25891 26072 148 | 148 0 26073 26117 149 | 149 0 26118 26189 150 | 150 0 26190 26283 151 | 151 0 26284 26560 152 | 152 0 26561 26659 153 | 153 0 26660 26864 154 | 154 0 26865 27047 155 | 155 0 27048 27171 156 | 156 0 27172 27255 157 | 157 0 27256 27421 158 | 158 0 27422 27588 159 | 159 0 27589 27704 160 | 160 0 27705 27923 161 | 161 0 27924 28082 162 | 162 0 28083 28214 163 | 163 0 28215 28351 164 | 164 0 28352 28495 165 | 165 0 28496 28604 166 | 166 0 28605 28707 167 | 167 0 28708 28846 168 | 168 0 28847 29051 169 | 169 1 29052 29259 170 | 170 0 29260 29444 171 | 171 0 29445 29597 172 | 172 0 29598 29732 173 | 173 0 29733 29844 174 | 174 0 29845 29920 175 | 175 0 29921 30038 176 | 176 0 30039 30187 177 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6523084.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 6 0 20 2 | SPAN 2 EXPERIMENT:current_exp 6 35 43 3 | SPAN 3 MATERIAL 6 113 127 4 | SPAN 4 MATERIAL 6 129 132 5 | SPAN 5 MATERIAL 6 169 185 6 | SPAN 6 MATERIAL 6 187 190 7 | SPAN 7 MATERIAL 21 20 34 8 | SPAN 8 MATERIAL 21 76 92 9 | SPAN 9 MATERIAL 21 149 173 10 | SPAN 10 MATERIAL 21 175 179 11 | SPAN 11 MATERIAL 35 129 145 12 | SPAN 12 MATERIAL 35 147 150 13 | SPAN 13 MATERIAL 35 155 159 14 | SPAN 14 MATERIAL 45 95 110 15 | SPAN 15 MATERIAL 45 130 141 16 | SPAN 16 EXPERIMENT:general_info 45 154 158 17 | SPAN 17 DEVICE 45 187 191 18 | SPAN 18 MATERIAL 46 0 4 19 | SPAN 19 EXPERIMENT:general_info 46 33 37 20 | SPAN 20 DEVICE 46 54 58 21 | SPAN 21 MATERIAL 47 11 26 22 | SPAN 22 EXPERIMENT:general_info 47 41 46 23 | SPAN 23 VALUE 47 99 114 24 | SPAN 24 MATERIAL 57 42 56 25 | SPAN 25 MATERIAL 57 58 61 26 | SPAN 26 MATERIAL 58 65 81 27 | SPAN 27 MATERIAL 58 83 86 28 | SPAN 28 MATERIAL 58 89 105 29 | SPAN 29 MATERIAL 58 107 110 30 | SPAN 30 MATERIAL 58 113 129 31 | SPAN 31 MATERIAL 58 131 134 32 | SPAN 32 MATERIAL 58 141 165 33 | SPAN 33 MATERIAL 58 167 171 34 | SPAN 34 MATERIAL 63 105 117 35 | SPAN 35 MATERIAL 63 119 122 36 | SPAN 36 EXPERIMENT:current_exp 120 23 32 37 | SPAN 37 MATERIAL 120 36 40 38 | SPAN 38 MATERIAL 120 41 44 39 | SPAN 39 MATERIAL 120 45 49 40 | SPAN 40 DEVICE 120 62 67 41 | SPAN 41 EXPERIMENT:current_exp 163 15 19 42 | SPAN 42 VALUE 163 23 29 43 | SPAN 43 MATERIAL 163 33 36 44 | SPAN 44 VALUE 163 64 68 45 | SPAN 45 EXPERIMENT:current_exp 183 39 47 46 | SPAN 46 VALUE 183 51 57 47 | SPAN 47 DEVICE 183 99 104 48 | SPAN 48 DEVICE 184 45 50 49 | SPAN 49 EXPERIMENT:current_exp 184 51 55 50 | SPAN 50 VALUE 184 77 83 51 | SPAN 51 DEVICE 185 36 41 52 | SPAN 52 EXPERIMENT:current_exp 185 42 49 53 | SPAN 53 VALUE 185 89 100 54 | SPAN 54 MATERIAL 185 105 108 55 | SPAN 55 DEVICE 185 121 125 56 | SPAN 56 VALUE 185 142 153 57 | SPAN 57 MATERIAL 185 158 161 58 | SPAN 58 VALUE 185 208 219 59 | SPAN 59 EXPERIMENT:current_exp 186 57 66 60 | SPAN 60 VALUE 186 90 96 61 | SPAN 61 MATERIAL 186 98 101 62 | SPAN 62 VALUE 186 119 125 63 | SPAN 63 MATERIAL 186 127 130 64 | SPAN 64 VALUE 186 180 186 65 | SPAN 65 EXPERIMENT:current_exp 191 35 44 66 | SPAN 66 VALUE 191 48 58 67 | SPAN 67 DEVICE 191 77 81 68 | SPAN 68 VALUE 191 83 93 69 | SPAN 69 MATERIAL 191 112 120 70 | SPAN 70 DEVICE 191 121 125 71 | SPAN 71 VALUE 191 131 141 72 | SPAN 72 MATERIAL 191 159 167 73 | SPAN 73 DEVICE 191 168 172 74 | EXPERIMENT 1 2 75 | cathode_material 1 76 | cathode_material 3 77 | cathode_material 5 78 | EXPERIMENT 2 16 79 | cathode_material 14 80 | cathode_material 15 81 | device 17 82 | EXPERIMENT 3 19 83 | cathode_material 18 84 | device 20 85 | EXPERIMENT 4 22 86 | cathode_material 21 87 | degradation_rate 23 88 | EXPERIMENT 5 36 89 | cathode_material 37 90 | electrolyte_material 38 91 | anode_material 39 92 | device 40 93 | EXPERIMENT 6 41 94 | working_temperature 42 95 | fuel_used 43 96 | time_of_operation 44 97 | EXPERIMENT 7 45 98 | working_temperature 46 99 | device 47 100 | EXPERIMENT 8 49 101 | device 48 102 | open_circuit_voltage 50 103 | EXPERIMENT 9 52 104 | device 51 105 | power_density 53 106 | cathode_material 54 107 | device 55 108 | power_density 56 109 | cathode_material 57 110 | power_density 58 111 | EXPERIMENT 10 59 112 | resistance 60 113 | cathode_material 61 114 | resistance 62 115 | cathode_material 63 116 | resistance 64 117 | EXPERIMENT 11 65 118 | resistance 66 119 | device 67 120 | resistance 68 121 | cathode_material 69 122 | device 70 123 | resistance 71 124 | cathode_material 72 125 | device 73 126 | LINK same_experiment 49 52 127 | LINK same_experiment 52 59 128 | LINK coreference 3 4 129 | LINK coreference 5 6 130 | LINK coreference 9 10 131 | LINK coreference 11 12 132 | LINK coreference 26 27 133 | LINK coreference 28 29 134 | LINK coreference 30 31 135 | LINK coreference 32 33 136 | LINK coreference 34 35 137 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5848893.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 536 2 | 2 0 537 573 3 | 3 0 574 1393 4 | 4 1 1394 1571 5 | 5 1 1572 1827 6 | 6 0 1828 2012 7 | 7 0 2013 2246 8 | 8 0 2247 2413 9 | 9 0 2414 2538 10 | 10 0 2539 2784 11 | 11 0 2785 2982 12 | 12 0 2983 3068 13 | 13 0 3069 3180 14 | 14 1 3181 3349 15 | 15 0 3350 3454 16 | 16 0 3455 3622 17 | 17 0 3623 3767 18 | 18 0 3768 3854 19 | 19 0 3855 3973 20 | 20 0 3974 4227 21 | 21 0 4228 4496 22 | 22 0 4497 4627 23 | 23 0 4628 4959 24 | 24 0 4960 5111 25 | 25 1 5112 5387 26 | 26 0 5388 5502 27 | 27 0 5503 5702 28 | 28 0 5703 5753 29 | 29 0 5754 5998 30 | 30 0 5999 6146 31 | 31 1 6147 6364 32 | 32 0 6365 6516 33 | 33 1 6517 6805 34 | 34 0 6806 6945 35 | 35 0 6946 7155 36 | 36 0 7156 7312 37 | 37 1 7313 7523 38 | 38 1 7524 7743 39 | 39 0 7744 7911 40 | 40 1 7912 8076 41 | 41 1 8077 8349 42 | 42 1 8350 8540 43 | 43 0 8541 8705 44 | 44 0 8706 8723 45 | 45 0 8724 8897 46 | 46 0 8898 9001 47 | 47 0 9002 9131 48 | 48 0 9132 9254 49 | 49 0 9255 9413 50 | 50 0 9414 9529 51 | 51 0 9530 9626 52 | 52 0 9627 9908 53 | 53 0 9909 10109 54 | 54 0 10110 10223 55 | 55 0 10224 10284 56 | 56 0 10285 10393 57 | 57 0 10394 10539 58 | 58 0 10540 10653 59 | 59 0 10654 10702 60 | 60 0 10703 11135 61 | 61 0 11136 11235 62 | 62 0 11236 11326 63 | 63 0 11327 11441 64 | 64 0 11442 11651 65 | 65 0 11652 11797 66 | 66 0 11798 11978 67 | 67 0 11979 12045 68 | 68 0 12046 12160 69 | 69 0 12161 12248 70 | 70 0 12249 12531 71 | 71 0 12532 12608 72 | 72 0 12609 12621 73 | 73 0 12622 12849 74 | 74 0 12850 12958 75 | 75 0 12959 13076 76 | 76 0 13077 13167 77 | 77 0 13168 13251 78 | 78 0 13252 13320 79 | 79 1 13321 13547 80 | 80 0 13548 13678 81 | 81 0 13679 13761 82 | 82 0 13762 13916 83 | 83 0 13917 14064 84 | 84 0 14065 14244 85 | 85 0 14245 14728 86 | 86 0 14729 14903 87 | 87 0 14904 15061 88 | 88 1 15062 15190 89 | 89 1 15191 15305 90 | 90 0 15306 15487 91 | 91 1 15488 15578 92 | 92 1 15579 15681 93 | 93 0 15682 15835 94 | 94 0 15836 16026 95 | 95 0 16027 16166 96 | 96 0 16167 16286 97 | 97 0 16287 16454 98 | 98 0 16455 16653 99 | 99 0 16654 16787 100 | 100 0 16788 16884 101 | 101 0 16885 16960 102 | 102 0 16961 17174 103 | 103 0 17175 17367 104 | 104 0 17368 17517 105 | 105 0 17518 17656 106 | 106 0 17657 17849 107 | 107 0 17850 18060 108 | 108 0 18061 18206 109 | 109 0 18207 18272 110 | 110 0 18273 18426 111 | 111 0 18427 18524 112 | 112 0 18525 18755 113 | 113 0 18756 18923 114 | 114 0 18924 19144 115 | 115 0 19145 19305 116 | 116 0 19306 19492 117 | 117 0 19493 19508 118 | 118 0 19509 19816 119 | 119 0 19817 20012 120 | 120 0 20013 20281 121 | 121 0 20282 20445 122 | 122 0 20446 20591 123 | 123 0 20592 20735 124 | 124 0 20736 20937 125 | 125 0 20938 21054 126 | 126 0 21055 21102 127 | 127 0 21103 21274 128 | 128 0 21275 21455 129 | 129 0 21456 21742 130 | 130 0 21743 21855 131 | 131 0 21856 21995 132 | 132 0 21996 22223 133 | 133 0 22224 22397 134 | 134 0 22398 22646 135 | 135 0 22647 22917 136 | 136 0 22918 23058 137 | 137 0 23059 23122 138 | 138 0 23123 23336 139 | 139 0 23337 23551 140 | 140 0 23552 23644 141 | 141 0 23645 23801 142 | 142 0 23802 24042 143 | 143 0 24043 24237 144 | 144 0 24238 24409 145 | 145 0 24410 24490 146 | 146 0 24491 24617 147 | 147 0 24618 24737 148 | 148 0 24738 24840 149 | 149 0 24841 24975 150 | 150 0 24976 25078 151 | 151 0 25079 25254 152 | 152 0 25255 25451 153 | 153 0 25452 25582 154 | 154 0 25583 25724 155 | 155 0 25725 26028 156 | 156 0 26029 26273 157 | 157 0 26274 26470 158 | 158 0 26471 26620 159 | 159 0 26621 26777 160 | 160 0 26778 27005 161 | 161 0 27006 27182 162 | 162 0 27183 27332 163 | 163 0 27333 27562 164 | 164 0 27563 27707 165 | 165 0 27708 27838 166 | 166 0 27839 27972 167 | 167 0 27973 28117 168 | 168 0 28118 28313 169 | 169 1 28314 28464 170 | 170 1 28465 28678 171 | 171 0 28679 28852 172 | 172 0 28853 29023 173 | 173 0 29024 29253 174 | 174 0 29254 29391 175 | 175 0 29392 29481 176 | 176 0 29482 29572 177 | 177 0 29573 29794 178 | 178 0 29795 30051 179 | -------------------------------------------------------------------------------- /code/source/main_preprocess.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | from argparse import ArgumentParser 20 | 21 | from preprocess import preprocess_embeddings 22 | from dataHandling.sofc_exp_utils import get_data_split_docids 23 | from dataHandling.dataLoader import get_vocab 24 | 25 | parser = ArgumentParser() 26 | # data location 27 | parser.add_argument('-corpus_dir', default='../sofc-exp-corpus', type=str) 28 | parser.add_argument('-corpus_meta_data_file', default='../sofc-exp-corpus/SOFC-Exp-Metadata.csv', type=str) 29 | 30 | # pretrained embedding location 31 | parser.add_argument('-embedding_file_word2vec', default='../data/embeddings/GoogleNews-vectors-negative300.bin') 32 | parser.add_argument('-embedding_file_mat2vec', default='../data/embeddings/pretrained_embeddings') 33 | parser.add_argument('-embedding_file_bpe', default='../data/embeddings/en.wiki.bpe.vs200000.d300.w2v.bin') 34 | parser.add_argument('-embedding_model_bpe', default='../data/embeddings/en.wiki.bpe.vs200000.model') 35 | 36 | # output location 37 | parser.add_argument('-output_word2index_file_mat2vec', default='../data/embeddings/word2index_mat2vec.pickle', type=str) 38 | parser.add_argument('-output_word2index_file_word2vec', default='../data/embeddings/word2index_word2vec.pickle', type=str) 39 | parser.add_argument('-output_word2index_file_bpe', default='../data/embeddings/word2index_bpe.pickle', type=str) 40 | parser.add_argument('-output_embedding_file_word2vec', default='../data/embeddings/word2vec.npy', type=str) 41 | parser.add_argument('-output_embedding_file_mat2vec', default='../data/embeddings/mat2vec.npy', type=str) 42 | parser.add_argument('-output_embedding_file_bpe', default='../data/embeddings/bpe.npy', type=str) 43 | 44 | args = parser.parse_args() 45 | 46 | print(args) 47 | # convert args to a dictionary 48 | options = vars(args) 49 | 50 | mat2vec_file = options['embedding_file_mat2vec'] 51 | word2vec_file = options['embedding_file_word2vec'] 52 | bpe_file = options['embedding_file_bpe'] 53 | 54 | train_ids, dev_ids, test_ids = get_data_split_docids(options['corpus_meta_data_file']) 55 | 56 | data_vocabulary = get_vocab(options['corpus_dir'], train_ids + dev_ids + test_ids) 57 | 58 | mat2vec_word2index, mat2vec_matrix = preprocess_embeddings.get_embedding_weight_matrix(mat2vec_file, "mat2vec", data_vocabulary) 59 | word2vec_word2index, word2vec_matrix = preprocess_embeddings.get_embedding_weight_matrix(word2vec_file, "word2vec", data_vocabulary) 60 | bpe_word2index, bpe_matrix = preprocess_embeddings.get_bpe_embedding_weight_matrix(bpe_file, options["embedding_model_bpe"], data_vocabulary) 61 | 62 | preprocess_embeddings.save_word2index(mat2vec_word2index, options['output_word2index_file_mat2vec']) 63 | preprocess_embeddings.save_word2index(word2vec_word2index, options['output_word2index_file_word2vec']) 64 | preprocess_embeddings.save_word2index(bpe_word2index, options['output_word2index_file_bpe']) 65 | 66 | preprocess_embeddings.save_weight_matrix(mat2vec_matrix, options['output_embedding_file_mat2vec']) 67 | preprocess_embeddings.save_weight_matrix(word2vec_matrix, options['output_embedding_file_word2vec']) 68 | preprocess_embeddings.save_weight_matrix(bpe_matrix, options['output_embedding_file_bpe']) 69 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5700654.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 DEVICE 9 83 88 2 | SPAN 2 MATERIAL 10 51 72 3 | SPAN 3 MATERIAL 10 74 78 4 | SPAN 4 MATERIAL 10 163 177 5 | SPAN 5 MATERIAL 10 179 182 6 | SPAN 6 MATERIAL 11 161 192 7 | SPAN 7 MATERIAL 11 194 199 8 | SPAN 8 DEVICE 88 2 6 9 | SPAN 9 MATERIAL 88 20 25 10 | SPAN 10 EXPERIMENT:current_exp 88 32 41 11 | SPAN 11 VALUE 88 72 83 12 | SPAN 12 VALUE 88 87 93 13 | SPAN 13 EXPERIMENT:current_exp 88 94 103 14 | SPAN 14 MATERIAL 88 107 109 15 | SPAN 15 EXPERIMENT:current_exp 88 117 120 16 | SPAN 16 MATERIAL 88 159 163 17 | SPAN 17 DEVICE 90 2 6 18 | SPAN 18 EXPERIMENT:current_exp 90 7 17 19 | SPAN 19 MATERIAL 90 29 46 20 | SPAN 20 MATERIAL 90 47 50 21 | SPAN 21 MATERIAL 90 64 67 22 | SPAN 22 MATERIAL 90 84 109 23 | SPAN 23 MATERIAL 90 112 115 24 | SPAN 24 VALUE 90 185 188 25 | SPAN 25 VALUE 90 190 193 26 | SPAN 26 VALUE 90 199 204 27 | SPAN 27 EXPERIMENT:current_exp 90 206 215 28 | SPAN 28 VALUE 90 224 227 29 | SPAN 29 VALUE 90 229 232 30 | SPAN 30 VALUE 90 234 237 31 | SPAN 31 VALUE 90 239 242 32 | SPAN 32 VALUE 90 248 259 33 | SPAN 33 EXPERIMENT:current_exp 90 260 269 34 | SPAN 34 MATERIAL 90 273 275 35 | SPAN 35 VALUE 90 284 287 36 | SPAN 36 VALUE 90 289 292 37 | SPAN 37 VALUE 90 294 297 38 | SPAN 38 VALUE 90 299 302 39 | SPAN 39 VALUE 90 308 314 40 | SPAN 40 DEVICE 94 38 43 41 | SPAN 41 MATERIAL 94 57 62 42 | SPAN 42 MATERIAL 94 68 86 43 | SPAN 43 EXPERIMENT:current_exp 94 94 98 44 | SPAN 44 VALUE 94 99 102 45 | SPAN 45 VALUE 94 107 118 46 | SPAN 46 EXPERIMENT:current_exp 94 119 128 47 | SPAN 47 MATERIAL 94 132 134 48 | SPAN 48 EXPERIMENT:current_exp 96 35 38 49 | SPAN 49 MATERIAL 96 39 56 50 | SPAN 50 DEVICE 96 74 79 51 | SPAN 51 MATERIAL 96 97 118 52 | SPAN 52 MATERIAL 96 120 138 53 | SPAN 53 MATERIAL 96 144 163 54 | SPAN 54 MATERIAL 96 164 167 55 | SPAN 55 EXPERIMENT:current_exp 96 175 179 56 | SPAN 56 VALUE 96 180 183 57 | SPAN 57 VALUE 96 185 188 58 | SPAN 58 VALUE 96 194 205 59 | SPAN 59 DEVICE 103 42 46 60 | SPAN 60 MATERIAL 103 64 81 61 | SPAN 61 MATERIAL 103 82 85 62 | SPAN 62 EXPERIMENT:current_exp 103 92 100 63 | SPAN 63 VALUE 103 120 125 64 | SPAN 64 VALUE 103 130 132 65 | SPAN 65 VALUE 103 137 141 66 | SPAN 66 MATERIAL 103 145 167 67 | SPAN 67 DEVICE 108 30 34 68 | SPAN 68 EXPERIMENT:current_exp 108 45 55 69 | SPAN 69 VALUE 108 59 65 70 | SPAN 70 VALUE 108 70 81 71 | EXPERIMENT 1 10 72 | device 8 73 | anode_material 9 74 | power_density 11 75 | working_temperature 12 76 | EXPERIMENT 2 13 77 | fuel_used 14 78 | EXPERIMENT 3 15 79 | anode_material 16 80 | EXPERIMENT 4 18 81 | device 17 82 | anode_material 19 83 | anode_material 20 84 | electrolyte_material 21 85 | cathode_material 22 86 | cathode_material 23 87 | EXPERIMENT 5 27 88 | power_density 28 89 | power_density 29 90 | power_density 30 91 | power_density 31 92 | power_density 32 93 | EXPERIMENT 6 33 94 | fuel_used 34 95 | working_temperature 35 96 | working_temperature 36 97 | working_temperature 37 98 | working_temperature 38 99 | working_temperature 39 100 | EXPERIMENT 7 43 101 | device 40 102 | anode_material 41 103 | anode_material 42 104 | power_density 44 105 | power_density 45 106 | EXPERIMENT 8 46 107 | fuel_used 47 108 | EXPERIMENT 9 48 109 | fuel_used 49 110 | EXPERIMENT 10 55 111 | device 50 112 | anode_material 51 113 | anode_material 52 114 | anode_material 53 115 | anode_material 54 116 | power_density 56 117 | power_density 57 118 | power_density 58 119 | EXPERIMENT 11 62 120 | device 59 121 | anode_material 60 122 | anode_material 61 123 | voltage 63 124 | time_of_operation 64 125 | time_of_operation 65 126 | fuel_used 66 127 | EXPERIMENT 12 68 128 | device 67 129 | voltage 69 130 | power_density 70 131 | LINK experiment_variation 15 10 132 | LINK same_experiment 10 13 133 | LINK thickness 20 24 134 | LINK thickness 21 25 135 | LINK thickness 22 26 136 | LINK same_experiment 18 27 137 | LINK same_experiment 27 33 138 | LINK same_experiment 43 46 139 | LINK same_experiment 48 55 140 | LINK coreference 2 3 141 | LINK coreference 4 5 142 | LINK coreference 6 7 143 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6461657.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1340 2 | 2 1 1341 1503 3 | 3 1 1504 1750 4 | 4 1 1751 1955 5 | 5 1 1956 2060 6 | 6 0 2061 2251 7 | 7 1 2252 2377 8 | 8 0 2378 2482 9 | 9 0 2483 2567 10 | 10 0 2568 2954 11 | 11 0 2955 3192 12 | 12 1 3193 3565 13 | 13 0 3566 3934 14 | 14 1 3935 4277 15 | 15 0 4278 4590 16 | 16 0 4591 4731 17 | 17 1 4732 5083 18 | 18 0 5084 5368 19 | 19 1 5369 5702 20 | 20 0 5703 5921 21 | 21 0 5922 6098 22 | 22 0 6099 6268 23 | 23 0 6269 6409 24 | 24 0 6410 6659 25 | 25 1 6660 6857 26 | 26 0 6858 7076 27 | 27 0 7077 7296 28 | 28 0 7297 7515 29 | 29 0 7516 7668 30 | 30 1 7669 7831 31 | 31 1 7832 8078 32 | 32 1 8079 8378 33 | 33 0 8379 8549 34 | 34 0 8550 8657 35 | 35 0 8658 8821 36 | 36 0 8822 9055 37 | 37 0 9056 9248 38 | 38 0 9249 9416 39 | 39 0 9417 9563 40 | 40 0 9564 9688 41 | 41 0 9689 9864 42 | 42 0 9865 10244 43 | 43 0 10245 10356 44 | 44 0 10357 10541 45 | 45 0 10542 10656 46 | 46 0 10657 11140 47 | 47 0 11141 11260 48 | 48 0 11261 11493 49 | 49 0 11494 11593 50 | 50 0 11594 11699 51 | 51 0 11700 11796 52 | 52 0 11797 12058 53 | 53 0 12059 12232 54 | 54 0 12233 12475 55 | 55 0 12476 12603 56 | 56 0 12604 12695 57 | 57 1 12696 12853 58 | 58 1 12854 13061 59 | 59 0 13062 13759 60 | 60 1 13760 13885 61 | 61 1 13886 14081 62 | 62 1 14082 14333 63 | 63 1 14334 14537 64 | 64 1 14538 14871 65 | 65 0 14872 15200 66 | 66 0 15201 15384 67 | 67 0 15385 15454 68 | 68 0 15455 15669 69 | 69 0 15670 15834 70 | 70 1 15835 15957 71 | 71 1 15958 16084 72 | 72 0 16085 16258 73 | 73 1 16259 16399 74 | 74 1 16400 16559 75 | 75 0 16560 16768 76 | 76 1 16769 16905 77 | 77 0 16906 17068 78 | 78 1 17069 17253 79 | 79 1 17254 17670 80 | 80 0 17671 17833 81 | 81 0 17834 18070 82 | 82 0 18071 18253 83 | 83 0 18254 18463 84 | 84 0 18464 18735 85 | 85 1 18736 18973 86 | 86 0 18974 19137 87 | 87 0 19138 19245 88 | 88 0 19246 19425 89 | 89 0 19426 19589 90 | 90 0 19590 19662 91 | 91 0 19663 19860 92 | 92 0 19861 20076 93 | 93 1 20077 20216 94 | 94 1 20217 20329 95 | 95 1 20330 20415 96 | 96 1 20416 20570 97 | 97 0 20571 20701 98 | 98 0 20702 20897 99 | 99 1 20898 21322 100 | 100 0 21323 21447 101 | 101 0 21448 21756 102 | 102 0 21757 21896 103 | 103 0 21897 22122 104 | 104 1 22123 22316 105 | 105 0 22317 22442 106 | 106 0 22443 22606 107 | 107 0 22607 22928 108 | 108 0 22929 23075 109 | 109 0 23076 23158 110 | 110 0 23159 23268 111 | 111 0 23269 23374 112 | 112 0 23375 23722 113 | 113 0 23723 23826 114 | 114 0 23827 24060 115 | 115 0 24061 24245 116 | 116 0 24246 24330 117 | 117 0 24331 24484 118 | 118 0 24485 24609 119 | 119 0 24610 24715 120 | 120 0 24716 24951 121 | 121 0 24952 25139 122 | 122 0 25140 25645 123 | 123 0 25646 25880 124 | 124 0 25881 26125 125 | 125 0 26126 26344 126 | 126 0 26345 26501 127 | 127 0 26502 26649 128 | 128 0 26650 26848 129 | 129 0 26849 27147 130 | 130 0 27148 27280 131 | 131 0 27281 27392 132 | 132 0 27393 27524 133 | 133 0 27525 27713 134 | 134 0 27714 27826 135 | 135 1 27827 27997 136 | 136 1 27998 28256 137 | 137 0 28257 28447 138 | 138 1 28448 28865 139 | 139 0 28866 29098 140 | 140 0 29099 29409 141 | 141 0 29410 29542 142 | 142 0 29543 29667 143 | 143 0 29668 29854 144 | 144 0 29855 29994 145 | 145 0 29995 30253 146 | 146 0 30254 30558 147 | 147 0 30559 30815 148 | 148 0 30816 31067 149 | 149 1 31068 31229 150 | 150 1 31230 31527 151 | 151 0 31528 31806 152 | 152 0 31807 31942 153 | 153 0 31943 32110 154 | 154 0 32111 32265 155 | 155 0 32266 32370 156 | 156 0 32371 32659 157 | 157 0 32660 32890 158 | 158 0 32891 33022 159 | 159 0 33023 33165 160 | 160 0 33166 33324 161 | 161 0 33325 33455 162 | 162 0 33456 33646 163 | 163 0 33647 33769 164 | 164 0 33770 34151 165 | 165 0 34152 34294 166 | 166 0 34295 34479 167 | 167 0 34480 34844 168 | 168 0 34845 34965 169 | 169 0 34966 35193 170 | 170 0 35194 35298 171 | 171 0 35299 35475 172 | 172 0 35476 35649 173 | 173 0 35650 35837 174 | 174 0 35838 36063 175 | 175 0 36064 36169 176 | 176 0 36170 36411 177 | 177 0 36412 36580 178 | 178 0 36581 36754 179 | 179 0 36755 36963 180 | 180 0 36964 37176 181 | 181 0 37177 37313 182 | 182 0 37314 37419 183 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6370853.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1458 2 | 2 0 1459 1570 3 | 3 0 1571 1782 4 | 4 0 1783 2078 5 | 5 0 2079 2260 6 | 6 0 2261 2378 7 | 7 0 2379 2787 8 | 8 0 2788 3183 9 | 9 0 3184 3259 10 | 10 0 3260 3399 11 | 11 0 3400 3509 12 | 12 0 3510 3764 13 | 13 0 3765 3942 14 | 14 0 3943 4108 15 | 15 0 4109 4357 16 | 16 0 4358 4481 17 | 17 0 4482 4724 18 | 18 0 4725 4911 19 | 19 0 4912 6806 20 | 20 0 6807 6810 21 | 21 0 6811 6879 22 | 22 0 6880 7078 23 | 23 0 7079 7192 24 | 24 0 7193 7426 25 | 25 0 7427 7617 26 | 26 0 7618 7843 27 | 27 0 7844 8027 28 | 28 0 8028 8274 29 | 29 0 8275 8531 30 | 30 0 8532 8735 31 | 31 0 8736 8840 32 | 32 0 8841 9068 33 | 33 0 9069 9131 34 | 34 0 9132 10162 35 | 35 0 10163 10294 36 | 36 0 10295 10515 37 | 37 0 10516 10609 38 | 38 0 10610 10853 39 | 39 0 10854 11047 40 | 40 0 11048 11158 41 | 41 0 11159 11263 42 | 42 0 11264 11358 43 | 43 0 11359 11532 44 | 44 0 11533 12074 45 | 45 0 12075 12306 46 | 46 0 12307 13322 47 | 47 0 13323 13384 48 | 48 0 13385 14362 49 | 49 0 14363 16246 50 | 50 0 16247 16390 51 | 51 0 16391 16574 52 | 52 0 16575 16686 53 | 53 0 16687 16753 54 | 54 0 16754 16908 55 | 55 0 16909 17001 56 | 56 0 17002 17131 57 | 57 0 17132 17278 58 | 58 0 17279 17379 59 | 59 0 17380 17572 60 | 60 0 17573 17711 61 | 61 0 17712 17937 62 | 62 0 17938 18308 63 | 63 0 18309 18368 64 | 64 0 18369 18457 65 | 65 0 18458 18596 66 | 66 0 18597 18768 67 | 67 0 18769 18985 68 | 68 0 18986 19164 69 | 69 0 19165 19452 70 | 70 0 19453 19623 71 | 71 0 19624 19772 72 | 72 0 19773 19907 73 | 73 0 19908 20285 74 | 74 0 20286 21011 75 | 75 0 21012 21192 76 | 76 0 21193 21486 77 | 77 0 21487 21668 78 | 78 0 21669 21798 79 | 79 0 21799 21960 80 | 80 0 21961 22146 81 | 81 0 22147 22325 82 | 82 0 22326 22504 83 | 83 0 22505 23211 84 | 84 0 23212 23318 85 | 85 0 23319 23545 86 | 86 0 23546 23646 87 | 87 0 23647 23861 88 | 88 0 23862 24001 89 | 89 0 24002 24150 90 | 90 0 24151 24388 91 | 91 0 24389 24510 92 | 92 0 24511 24713 93 | 93 0 24714 24804 94 | 94 0 24805 25083 95 | 95 0 25084 25213 96 | 96 0 25214 25386 97 | 97 0 25387 25606 98 | 98 0 25607 26585 99 | 99 0 26586 26756 100 | 100 0 26757 26905 101 | 101 0 26906 27120 102 | 102 0 27121 27191 103 | 103 0 27192 27280 104 | 104 1 27281 27522 105 | 105 1 27523 27684 106 | 106 1 27685 27859 107 | 107 0 27860 28082 108 | 108 0 28083 28391 109 | 109 0 28392 29008 110 | 110 0 29009 29198 111 | 111 1 29199 29530 112 | 112 0 29531 29610 113 | 113 0 29611 29812 114 | 114 0 29813 29938 115 | 115 0 29939 30098 116 | 116 1 30099 30350 117 | 117 0 30351 30528 118 | 118 0 30529 30749 119 | 119 0 30750 30933 120 | 120 0 30934 31127 121 | 121 0 31128 31312 122 | 122 0 31313 31467 123 | 123 0 31468 31674 124 | 124 0 31675 31840 125 | 125 0 31841 33050 126 | 126 0 33051 33099 127 | 127 0 33100 33303 128 | 128 0 33304 33481 129 | 129 0 33482 33821 130 | 130 1 33822 34055 131 | 131 0 34056 34192 132 | 132 0 34193 34493 133 | 133 0 34494 34663 134 | 134 0 34664 34906 135 | 135 0 34907 35106 136 | 136 0 35107 35182 137 | 137 0 35183 35280 138 | 138 0 35281 35446 139 | 139 0 35447 35500 140 | 140 0 35501 35672 141 | 141 0 35673 35751 142 | 142 0 35752 35857 143 | 143 0 35858 36073 144 | 144 0 36074 36284 145 | 145 0 36285 36352 146 | 146 0 36353 36432 147 | 147 0 36433 36633 148 | 148 0 36634 36716 149 | 149 0 36717 36785 150 | 150 0 36786 36865 151 | 151 0 36866 37080 152 | 152 0 37081 37244 153 | 153 0 37245 37325 154 | 154 0 37326 37499 155 | 155 0 37500 37623 156 | 156 0 37624 37740 157 | 157 0 37741 37827 158 | 158 0 37828 37957 159 | 159 0 37958 38120 160 | 160 0 38121 38253 161 | 161 0 38254 38392 162 | 162 0 38393 38507 163 | 163 0 38508 38640 164 | 164 0 38641 38702 165 | 165 0 38703 38806 166 | 166 0 38807 38891 167 | 167 0 38892 39024 168 | 168 0 39025 39210 169 | 169 0 39211 39355 170 | 170 0 39356 39475 171 | 171 0 39476 39597 172 | 172 0 39598 39770 173 | 173 0 39771 39838 174 | 174 0 39839 40056 175 | 175 0 40057 40228 176 | 176 0 40229 40371 177 | 177 0 40372 40495 178 | 178 0 40496 40672 179 | 179 0 40673 42251 180 | 180 0 42252 42288 181 | 181 0 42289 42425 182 | 182 0 42426 42486 183 | 183 0 42487 42592 184 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6247067.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1040 2 | 2 0 1041 1148 3 | 3 0 1149 1339 4 | 4 0 1340 1635 5 | 5 1 1636 1749 6 | 6 1 1750 1823 7 | 7 1 1824 2072 8 | 8 0 2073 2338 9 | 9 0 2339 2751 10 | 10 0 2752 2759 11 | 11 0 2760 2769 12 | 12 0 2770 2780 13 | 13 0 2781 2790 14 | 14 0 2791 2803 15 | 15 0 2804 2806 16 | 16 0 2807 2820 17 | 17 0 2821 2828 18 | 18 0 2829 2840 19 | 19 0 2841 3049 20 | 20 0 3050 3298 21 | 21 0 3299 3525 22 | 22 0 3526 3720 23 | 23 0 3721 3989 24 | 24 0 3990 4142 25 | 25 0 4143 4788 26 | 26 0 4789 5050 27 | 27 0 5051 5230 28 | 28 1 5231 6310 29 | 29 1 6311 7074 30 | 30 0 7075 7293 31 | 31 0 7294 7423 32 | 32 0 7424 7817 33 | 33 0 7818 7943 34 | 34 0 7944 8140 35 | 35 0 8141 8265 36 | 36 0 8266 8369 37 | 37 0 8370 8484 38 | 38 0 8485 8817 39 | 39 1 8818 9258 40 | 40 0 9259 9649 41 | 41 0 9650 9801 42 | 42 0 9802 9977 43 | 43 0 9978 10088 44 | 44 0 10089 10276 45 | 45 0 10277 10424 46 | 46 0 10425 10831 47 | 47 1 10832 11149 48 | 48 1 11150 11563 49 | 49 0 11564 11656 50 | 50 1 11657 11859 51 | 51 1 11860 12001 52 | 52 0 12002 12162 53 | 53 0 12163 12468 54 | 54 0 12469 12624 55 | 55 0 12625 12694 56 | 56 0 12695 12852 57 | 57 0 12853 12969 58 | 58 0 12970 13076 59 | 59 0 13077 13255 60 | 60 0 13256 13354 61 | 61 0 13355 13513 62 | 62 0 13514 13832 63 | 63 0 13833 14063 64 | 64 0 14064 14200 65 | 65 0 14201 14381 66 | 66 0 14382 14510 67 | 67 0 14511 14656 68 | 68 0 14657 14783 69 | 69 0 14784 14925 70 | 70 0 14926 15119 71 | 71 0 15120 15266 72 | 72 0 15267 15472 73 | 73 0 15473 15601 74 | 74 0 15602 15776 75 | 75 0 15777 15863 76 | 76 0 15864 15985 77 | 77 0 15986 16094 78 | 78 0 16095 16259 79 | 79 0 16260 16713 80 | 80 0 16714 16919 81 | 81 0 16920 17147 82 | 82 0 17148 17329 83 | 83 0 17330 17462 84 | 84 0 17463 17555 85 | 85 0 17556 17737 86 | 86 0 17738 17858 87 | 87 0 17859 18006 88 | 88 0 18007 18211 89 | 89 0 18212 18316 90 | 90 0 18317 18498 91 | 91 0 18499 18792 92 | 92 0 18793 18953 93 | 93 0 18954 20120 94 | 94 0 20121 20247 95 | 95 0 20248 20389 96 | 96 0 20390 20692 97 | 97 0 20693 20807 98 | 98 0 20808 20915 99 | 99 0 20916 21060 100 | 100 0 21061 21211 101 | 101 0 21212 21306 102 | 102 0 21307 21829 103 | 103 0 21830 21988 104 | 104 0 21989 22283 105 | 105 0 22284 22433 106 | 106 0 22434 22642 107 | 107 0 22643 22799 108 | 108 0 22800 22886 109 | 109 0 22887 23460 110 | 110 0 23461 23556 111 | 111 0 23557 23717 112 | 112 0 23718 23884 113 | 113 0 23885 23998 114 | 114 0 23999 24233 115 | 115 0 24234 24341 116 | 116 0 24342 24528 117 | 117 0 24529 24641 118 | 118 0 24642 24762 119 | 119 0 24763 24917 120 | 120 0 24918 25008 121 | 121 0 25009 25118 122 | 122 0 25119 25312 123 | 123 0 25313 25434 124 | 124 0 25435 25541 125 | 125 0 25542 25692 126 | 126 0 25693 25773 127 | 127 0 25774 25938 128 | 128 0 25939 26129 129 | 129 0 26130 26199 130 | 130 0 26200 26259 131 | 131 0 26260 26413 132 | 132 0 26414 26563 133 | 133 0 26564 27016 134 | 134 0 27017 27122 135 | 135 0 27123 27287 136 | 136 0 27288 27411 137 | 137 0 27412 27624 138 | 138 0 27625 27728 139 | 139 0 27729 27828 140 | 140 0 27829 28138 141 | 141 0 28139 28221 142 | 142 0 28222 28375 143 | 143 0 28376 28622 144 | 144 0 28623 28688 145 | 145 0 28689 28832 146 | 146 0 28833 29029 147 | 147 0 29030 29133 148 | 148 0 29134 29269 149 | 149 0 29270 29524 150 | 150 0 29525 29741 151 | 151 0 29742 30340 152 | 152 0 30341 30592 153 | 153 0 30593 30843 154 | 154 0 30844 30983 155 | 155 0 30984 31235 156 | 156 0 31236 31314 157 | 157 0 31315 31463 158 | 158 0 31464 31540 159 | 159 0 31541 31642 160 | 160 0 31643 31755 161 | 161 0 31756 32227 162 | 162 0 32228 32419 163 | 163 0 32420 32536 164 | 164 0 32537 32625 165 | 165 0 32626 32877 166 | 166 0 32878 33029 167 | 167 0 33030 33148 168 | 168 0 33149 33222 169 | 169 0 33223 33557 170 | 170 0 33558 33591 171 | 171 0 33592 33811 172 | 172 0 33812 33894 173 | 173 0 33895 33983 174 | 174 0 33984 34220 175 | 175 0 34221 34342 176 | 176 0 34343 34470 177 | 177 0 34471 34566 178 | 178 0 34567 34647 179 | 179 0 34648 34773 180 | 180 0 34774 34848 181 | 181 0 34849 35049 182 | 182 0 35050 35248 183 | 183 0 35249 35310 184 | 184 0 35311 35496 185 | 185 0 35497 35639 186 | 186 0 35640 35705 187 | 187 0 35706 35779 188 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6247067.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 5 68 77 2 | SPAN 2 DEVICE 5 92 99 3 | SPAN 3 VALUE 5 100 112 4 | SPAN 4 VALUE 6 3 9 5 | SPAN 5 EXPERIMENT:current_exp 6 44 51 6 | SPAN 6 VALUE 6 52 63 7 | SPAN 7 VALUE 6 67 72 8 | SPAN 8 EXPERIMENT:current_exp 7 72 80 9 | SPAN 9 VALUE 7 126 131 10 | SPAN 10 VALUE 7 135 141 11 | SPAN 11 VALUE 7 151 155 12 | SPAN 12 MATERIAL 28 574 580 13 | SPAN 13 EXPERIMENT:general_info 28 591 595 14 | SPAN 14 MATERIAL 28 603 611 15 | SPAN 15 DEVICE 28 626 631 16 | SPAN 16 DEVICE 28 646 653 17 | SPAN 17 DEVICE 28 771 778 18 | SPAN 18 MATERIAL 28 958 967 19 | SPAN 19 MATERIAL 28 968 977 20 | SPAN 20 MATERIAL 28 982 991 21 | SPAN 21 EXPERIMENT:general_info 28 1002 1009 22 | SPAN 22 DEVICE 28 1038 1043 23 | SPAN 23 VALUE 28 1068 1078 24 | SPAN 24 MATERIAL 29 39 45 25 | SPAN 25 MATERIAL 29 50 56 26 | SPAN 26 MATERIAL 29 58 62 27 | SPAN 27 EXPERIMENT:general_info 29 64 69 28 | SPAN 28 MATERIAL 29 218 222 29 | SPAN 29 EXPERIMENT:previous_work 29 272 281 30 | SPAN 30 MATERIAL 29 308 335 31 | SPAN 31 MATERIAL 29 337 343 32 | SPAN 32 DEVICE 29 349 354 33 | SPAN 33 EXPERIMENT:previous_work 29 517 525 34 | SPAN 34 DEVICE 29 537 541 35 | SPAN 35 MATERIAL 29 548 554 36 | SPAN 36 MATERIAL 29 559 586 37 | SPAN 37 MATERIAL 29 588 593 38 | SPAN 38 EXPERIMENT:previous_work 39 168 176 39 | SPAN 39 DEVICE 39 222 227 40 | SPAN 40 EXPERIMENT:previous_work 39 229 239 41 | SPAN 41 VALUE 39 282 288 42 | SPAN 42 MATERIAL 46 81 106 43 | SPAN 43 MATERIAL 46 108 112 44 | SPAN 44 MATERIAL 46 117 129 45 | SPAN 45 MATERIAL 46 131 135 46 | SPAN 46 EXPERIMENT:previous_work 47 12 20 47 | SPAN 47 MATERIAL 47 37 63 48 | SPAN 48 MATERIAL 47 65 70 49 | SPAN 49 EXPERIMENT:previous_work 47 96 103 50 | SPAN 50 DEVICE 47 107 111 51 | SPAN 51 VALUE 47 146 157 52 | SPAN 52 VALUE 47 161 167 53 | SPAN 53 EXPERIMENT:previous_work 47 191 199 54 | SPAN 54 MATERIAL 47 218 245 55 | SPAN 55 MATERIAL 47 247 252 56 | SPAN 56 EXPERIMENT:previous_work 47 277 289 57 | SPAN 57 DEVICE 47 300 304 58 | SPAN 58 EXPERIMENT:current_exp 48 261 270 59 | SPAN 59 EXPERIMENT:current_exp 48 336 346 60 | SPAN 60 MATERIAL 48 357 362 61 | SPAN 61 DEVICE 50 4 8 62 | SPAN 62 EXPERIMENT:current_exp 50 9 19 63 | SPAN 63 MATERIAL 50 30 35 64 | SPAN 64 MATERIAL 50 61 67 65 | SPAN 65 MATERIAL 50 85 94 66 | SPAN 66 EXPERIMENT:current_exp 50 118 130 67 | SPAN 67 VALUE 50 189 201 68 | SPAN 68 EXPERIMENT:current_exp 51 31 40 69 | SPAN 69 MATERIAL 51 44 54 70 | SPAN 70 MATERIAL 51 95 107 71 | EXPERIMENT 1 1 72 | device 2 73 | working_temperature 3 74 | EXPERIMENT 2 5 75 | working_temperature 4 76 | current_density 6 77 | voltage 7 78 | EXPERIMENT 3 8 79 | voltage 9 80 | working_temperature 10 81 | time_of_operation 11 82 | EXPERIMENT 4 13 83 | anode_material 12 84 | fuel_used 14 85 | device 15 86 | EXPERIMENT 5 21 87 | electrolyte_material 18 88 | electrolyte_material 19 89 | electrolyte_material 20 90 | device 22 91 | working_temperature 23 92 | EXPERIMENT 6 27 93 | electrolyte_material 24 94 | electrolyte_material 25 95 | EXPERIMENT 7 29 96 | anode_material 30 97 | device 32 98 | EXPERIMENT 8 33 99 | device 34 100 | electrolyte_material 35 101 | cathode_material 36 102 | EXPERIMENT 9 38 103 | device 39 104 | EXPERIMENT 10 40 105 | working_temperature 41 106 | EXPERIMENT 11 46 107 | cathode_material 47 108 | EXPERIMENT 12 49 109 | device 50 110 | power_density 51 111 | working_temperature 52 112 | EXPERIMENT 13 53 113 | cathode_material 54 114 | EXPERIMENT 14 56 115 | device 57 116 | EXPERIMENT 15 59 117 | anode_material 60 118 | EXPERIMENT 16 62 119 | device 61 120 | cathode_material 63 121 | electrolyte_material 64 122 | cathode_material 65 123 | EXPERIMENT 17 66 124 | working_temperature 67 125 | EXPERIMENT 18 68 126 | fuel_used 69 127 | fuel_used 70 128 | LINK experiment_variation 5 1 129 | LINK experiment_variation 27 21 130 | LINK same_experiment 38 40 131 | LINK same_experiment 46 49 132 | LINK same_experiment 53 56 133 | LINK same_experiment 58 59 134 | LINK same_experiment 62 66 135 | LINK coreference 25 26 136 | LINK coreference 30 31 137 | LINK coreference 36 37 138 | LINK coreference 42 43 139 | LINK coreference 44 45 140 | LINK coreference 47 48 141 | LINK coreference 54 55 142 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5457058.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 585 2 | 2 0 586 1044 3 | 3 1 1045 1167 4 | 4 0 1168 1318 5 | 5 0 1319 1581 6 | 6 0 1582 1699 7 | 7 1 1700 1830 8 | 8 1 1831 2014 9 | 9 1 2015 2232 10 | 10 0 2233 2360 11 | 11 0 2361 2415 12 | 12 0 2416 2610 13 | 13 0 2611 2755 14 | 14 0 2756 2857 15 | 15 0 2858 3202 16 | 16 0 3203 3481 17 | 17 0 3482 3603 18 | 18 0 3604 3771 19 | 19 0 3772 3902 20 | 20 0 3903 4228 21 | 21 0 4229 4463 22 | 22 1 4464 4748 23 | 23 0 4749 4965 24 | 24 0 4966 5189 25 | 25 0 5190 5290 26 | 26 0 5291 5467 27 | 27 0 5468 5503 28 | 28 0 5504 5594 29 | 29 0 5595 5728 30 | 30 0 5729 6111 31 | 31 0 6112 6328 32 | 32 0 6329 6587 33 | 33 0 6588 6720 34 | 34 0 6721 6787 35 | 35 0 6788 6831 36 | 36 0 6832 6935 37 | 37 0 6936 7005 38 | 38 0 7006 7313 39 | 39 0 7314 7381 40 | 40 0 7382 7483 41 | 41 0 7484 7603 42 | 42 0 7604 7759 43 | 43 0 7760 7897 44 | 44 0 7898 8071 45 | 45 0 8072 8222 46 | 46 0 8223 8339 47 | 47 0 8340 8538 48 | 48 0 8539 8630 49 | 49 0 8631 8694 50 | 50 0 8695 8878 51 | 51 0 8879 8942 52 | 52 0 8943 8986 53 | 53 0 8987 9165 54 | 54 0 9166 9240 55 | 55 0 9241 9357 56 | 56 0 9358 9483 57 | 57 0 9484 9636 58 | 58 0 9637 9848 59 | 59 0 9849 9986 60 | 60 0 9987 10149 61 | 61 0 10150 10251 62 | 62 0 10252 10370 63 | 63 0 10371 10506 64 | 64 1 10507 10753 65 | 65 1 10754 10947 66 | 66 0 10948 11081 67 | 67 0 11082 11211 68 | 68 0 11212 11349 69 | 69 1 11350 11492 70 | 70 0 11493 11569 71 | 71 0 11570 11759 72 | 72 0 11760 11852 73 | 73 0 11853 11953 74 | 74 0 11954 12021 75 | 75 0 12022 12167 76 | 76 1 12168 12322 77 | 77 0 12323 12535 78 | 78 0 12536 12653 79 | 79 0 12654 12681 80 | 80 0 12682 12835 81 | 81 0 12836 12869 82 | 82 0 12870 13049 83 | 83 0 13050 13148 84 | 84 0 13149 13418 85 | 85 0 13419 13516 86 | 86 0 13517 13607 87 | 87 0 13608 13764 88 | 88 0 13765 13920 89 | 89 0 13921 14154 90 | 90 0 14155 14285 91 | 91 0 14286 14362 92 | 92 0 14363 14413 93 | 93 0 14414 14641 94 | 94 0 14642 14707 95 | 95 0 14708 14778 96 | 96 0 14779 14944 97 | 97 0 14945 15258 98 | 98 0 15259 15520 99 | 99 0 15521 15633 100 | 100 0 15634 15957 101 | 101 0 15958 16183 102 | 102 0 16184 16402 103 | 103 0 16403 16493 104 | 104 0 16494 16879 105 | 105 0 16880 16987 106 | 106 0 16988 17105 107 | 107 0 17106 17175 108 | 108 0 17176 17310 109 | 109 0 17311 17450 110 | 110 0 17451 17527 111 | 111 0 17528 17716 112 | 112 0 17717 17837 113 | 113 0 17838 18021 114 | 114 0 18022 18288 115 | 115 0 18289 18657 116 | 116 0 18658 18867 117 | 117 0 18868 19022 118 | 118 0 19023 19201 119 | 119 0 19202 19407 120 | 120 0 19408 19551 121 | 121 0 19552 19694 122 | 122 0 19695 19862 123 | 123 0 19863 19915 124 | 124 0 19916 20030 125 | 125 0 20031 20139 126 | 126 0 20140 20175 127 | 127 0 20176 20293 128 | 128 0 20294 20362 129 | 129 0 20363 20503 130 | 130 0 20504 20705 131 | 131 0 20706 20781 132 | 132 0 20782 20809 133 | 133 0 20810 21061 134 | 134 0 21062 21265 135 | 135 0 21266 21485 136 | 136 0 21486 21669 137 | 137 0 21670 21740 138 | 138 0 21741 21938 139 | 139 0 21939 22172 140 | 140 0 22173 22510 141 | 141 0 22511 22742 142 | 142 0 22743 22865 143 | 143 0 22866 23176 144 | 144 1 23177 23374 145 | 145 1 23375 23472 146 | 146 0 23473 23699 147 | 147 0 23700 23779 148 | 148 0 23780 23883 149 | 149 0 23884 23957 150 | 150 0 23958 24059 151 | 151 0 24060 24186 152 | 152 0 24187 24274 153 | 153 0 24275 24347 154 | 154 0 24348 24776 155 | 155 0 24777 24858 156 | 156 0 24859 24922 157 | 157 0 24923 25088 158 | 158 0 25089 25209 159 | 159 0 25210 25291 160 | 160 0 25292 25356 161 | 161 0 25357 25436 162 | 162 0 25437 25510 163 | 163 0 25511 25610 164 | 164 0 25611 25807 165 | 165 0 25808 25947 166 | 166 0 25948 26093 167 | 167 0 26094 26216 168 | 168 0 26217 26441 169 | 169 0 26442 26725 170 | 170 1 26726 26804 171 | 171 0 26805 26853 172 | 172 0 26854 27088 173 | 173 1 27089 27242 174 | 174 0 27243 27377 175 | 175 0 27378 27579 176 | 176 0 27580 27796 177 | 177 0 27797 27935 178 | 178 0 27936 28026 179 | 179 0 28027 28142 180 | 180 1 28143 28354 181 | 181 1 28355 28493 182 | 182 0 28494 28552 183 | 183 1 28553 28667 184 | 184 0 28668 28810 185 | 185 0 28811 29132 186 | 186 0 29133 29602 187 | 187 0 29603 29763 188 | 188 1 29764 30107 189 | 189 1 30108 30339 190 | 190 1 30340 30474 191 | 191 0 30475 30676 192 | 192 0 30677 30831 193 | 193 0 30832 30971 194 | 194 0 30972 31096 195 | 195 1 31097 31299 196 | 196 0 31300 31534 197 | 197 1 31535 31678 198 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6517467.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 1021 2 | 2 0 1022 1286 3 | 3 1 1287 1589 4 | 4 1 1590 1675 5 | 5 0 1676 1805 6 | 6 0 1806 1962 7 | 7 0 1963 2064 8 | 8 0 2065 2234 9 | 9 0 2235 2648 10 | 10 0 2649 2934 11 | 11 0 2935 3107 12 | 12 1 3108 3267 13 | 13 0 3268 3393 14 | 14 0 3394 3622 15 | 15 0 3623 3756 16 | 16 0 3757 3910 17 | 17 0 3911 4201 18 | 18 0 4202 4357 19 | 19 0 4358 4542 20 | 20 0 4543 4662 21 | 21 1 4663 4911 22 | 22 0 4912 5028 23 | 23 0 5029 5188 24 | 24 0 5189 5344 25 | 25 0 5345 5566 26 | 26 0 5567 5697 27 | 27 0 5698 5877 28 | 28 0 5878 5975 29 | 29 0 5976 6090 30 | 30 0 6091 6206 31 | 31 0 6207 6336 32 | 32 0 6337 6501 33 | 33 0 6502 6750 34 | 34 0 6751 6900 35 | 35 0 6901 7079 36 | 36 0 7080 7221 37 | 37 0 7222 7305 38 | 38 0 7306 7396 39 | 39 0 7397 7494 40 | 40 0 7495 7595 41 | 41 0 7596 7686 42 | 42 0 7687 7850 43 | 43 0 7851 8077 44 | 44 0 8078 8260 45 | 45 0 8261 8492 46 | 46 0 8493 8578 47 | 47 0 8579 8711 48 | 48 0 8712 8786 49 | 49 0 8787 8920 50 | 50 0 8921 9065 51 | 51 0 9066 9323 52 | 52 0 9324 9531 53 | 53 0 9532 9583 54 | 54 0 9584 10178 55 | 55 0 10179 10274 56 | 56 0 10275 10573 57 | 57 0 10574 10695 58 | 58 0 10696 10800 59 | 59 0 10801 10989 60 | 60 0 10990 11322 61 | 61 0 11323 11600 62 | 62 0 11601 11839 63 | 63 0 11840 11955 64 | 64 0 11956 12203 65 | 65 0 12204 12394 66 | 66 0 12395 12700 67 | 67 0 12701 12870 68 | 68 0 12871 13061 69 | 69 0 13062 13147 70 | 70 0 13148 13366 71 | 71 0 13367 13489 72 | 72 0 13490 13617 73 | 73 0 13618 13799 74 | 74 0 13800 13920 75 | 75 0 13921 14020 76 | 76 0 14021 14147 77 | 77 0 14148 14304 78 | 78 0 14305 14497 79 | 79 0 14498 14705 80 | 80 0 14706 14839 81 | 81 0 14840 15076 82 | 82 0 15077 15379 83 | 83 0 15380 15449 84 | 84 0 15450 15684 85 | 85 0 15685 15988 86 | 86 0 15989 16195 87 | 87 0 16196 16338 88 | 88 0 16339 16461 89 | 89 0 16462 16631 90 | 90 0 16632 16724 91 | 91 0 16725 16926 92 | 92 1 16927 17045 93 | 93 1 17046 17128 94 | 94 0 17129 17259 95 | 95 1 17260 17438 96 | 96 1 17439 17594 97 | 97 0 17595 17804 98 | 98 0 17805 18003 99 | 99 0 18004 18212 100 | 100 0 18213 18261 101 | 101 0 18262 18405 102 | 102 1 18406 18608 103 | 103 0 18609 18786 104 | 104 0 18787 18996 105 | 105 0 18997 19201 106 | 106 0 19202 19364 107 | 107 0 19365 19635 108 | 108 0 19636 19868 109 | 109 1 19869 20064 110 | 110 0 20065 20367 111 | 111 0 20368 20502 112 | 112 0 20503 20625 113 | 113 0 20626 20841 114 | 114 0 20842 20978 115 | 115 0 20979 21437 116 | 116 0 21438 21539 117 | 117 0 21540 21655 118 | 118 0 21656 21828 119 | 119 0 21829 21975 120 | 120 0 21976 22218 121 | 121 1 22219 22333 122 | 122 0 22334 22534 123 | 123 0 22535 22683 124 | 124 1 22684 22919 125 | 125 1 22920 23021 126 | 126 0 23022 23255 127 | 127 0 23256 23314 128 | 128 0 23315 23373 129 | 129 0 23374 23588 130 | 130 0 23589 23658 131 | 131 0 23659 24012 132 | 132 0 24013 24530 133 | 133 0 24531 24855 134 | 134 0 24856 24893 135 | 135 0 24894 25028 136 | 136 0 25029 25071 137 | 137 0 25072 25387 138 | 138 0 25388 25599 139 | 139 0 25600 25776 140 | 140 0 25777 25927 141 | 141 0 25928 26148 142 | 142 0 26149 26242 143 | 143 0 26243 26835 144 | 144 0 26836 27110 145 | 145 0 27111 27617 146 | 146 0 27618 27697 147 | 147 0 27698 27861 148 | 148 0 27862 27998 149 | 149 0 27999 28301 150 | 150 0 28302 28437 151 | 151 0 28438 28705 152 | 152 0 28706 28888 153 | 153 0 28889 29147 154 | 154 0 29148 29233 155 | 155 0 29234 29360 156 | 156 1 29361 29529 157 | 157 0 29530 29761 158 | 158 0 29762 29908 159 | 159 0 29909 30035 160 | 160 0 30036 30201 161 | 161 0 30202 30554 162 | 162 0 30555 30812 163 | 163 0 30813 30951 164 | 164 0 30952 31105 165 | 165 0 31106 31259 166 | 166 0 31260 31377 167 | 167 0 31378 31515 168 | 168 0 31516 31636 169 | 169 0 31637 31843 170 | 170 0 31844 31936 171 | 171 0 31937 32054 172 | 172 0 32055 32159 173 | 173 0 32160 32353 174 | 174 1 32354 32572 175 | 175 1 32573 32786 176 | 176 0 32787 32954 177 | 177 0 32955 33124 178 | 178 0 33125 33225 179 | 179 0 33226 33374 180 | 180 0 33375 33420 181 | 181 0 33421 33477 182 | 182 0 33478 33488 183 | 183 0 33489 33545 184 | 184 0 33546 33556 185 | 185 0 33557 33613 186 | 186 0 33614 33624 187 | 187 0 33625 33675 188 | 188 0 33676 33686 189 | 189 0 33687 33752 190 | 190 0 33753 34428 191 | 191 0 34429 34989 192 | 192 0 34990 35073 193 | 193 0 35074 35125 194 | 194 0 35126 35178 195 | 195 0 35179 35242 196 | 196 0 35243 35321 197 | 197 0 35322 35457 198 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5456601.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 DEVICE 11 39 61 2 | SPAN 2 DEVICE 11 63 68 3 | SPAN 3 EXPERIMENT:general_info 11 71 76 4 | SPAN 4 MATERIAL 11 110 117 5 | SPAN 5 EXPERIMENT:general_info 11 132 139 6 | SPAN 6 VALUE 11 162 179 7 | SPAN 7 EXPERIMENT:current_exp 11 221 224 8 | SPAN 8 MATERIAL 11 238 249 9 | SPAN 9 MATERIAL 16 21 41 10 | SPAN 10 MATERIAL 16 43 46 11 | SPAN 11 DEVICE 16 71 75 12 | SPAN 12 DEVICE 17 49 54 13 | SPAN 13 EXPERIMENT:general_info 17 55 63 14 | SPAN 14 VALUE 17 67 74 15 | SPAN 15 MATERIAL 58 187 193 16 | SPAN 16 MATERIAL 58 195 201 17 | SPAN 17 MATERIAL 58 206 212 18 | SPAN 18 MATERIAL 59 41 55 19 | SPAN 19 MATERIAL 59 59 73 20 | SPAN 20 EXPERIMENT:previous_work 61 20 28 21 | SPAN 21 EXPERIMENT:previous_work 61 107 114 22 | SPAN 22 MATERIAL 61 120 144 23 | SPAN 23 MATERIAL 61 146 150 24 | SPAN 24 EXPERIMENT:previous_work 61 152 157 25 | SPAN 25 MATERIAL 61 216 240 26 | SPAN 26 MATERIAL 61 242 246 27 | SPAN 27 EXPERIMENT:general_info 61 264 268 28 | SPAN 28 DEVICE 61 298 302 29 | SPAN 29 MATERIAL 63 54 70 30 | SPAN 30 EXPERIMENT:general_info 64 38 42 31 | SPAN 31 DEVICE 64 47 51 32 | SPAN 32 EXPERIMENT:general_info 64 67 74 33 | SPAN 33 MATERIAL 64 75 99 34 | SPAN 34 MATERIAL 64 101 105 35 | SPAN 35 MATERIAL 66 97 128 36 | SPAN 36 MATERIAL 66 144 171 37 | SPAN 37 EXPERIMENT:previous_work 68 125 133 38 | SPAN 38 MATERIAL 68 159 171 39 | SPAN 39 MATERIAL 68 181 192 40 | SPAN 40 MATERIAL 69 79 82 41 | SPAN 41 EXPERIMENT:previous_work 69 93 105 42 | SPAN 42 MATERIAL 72 41 54 43 | SPAN 43 MATERIAL 72 59 73 44 | SPAN 44 DEVICE 74 49 65 45 | SPAN 45 DEVICE 75 8 12 46 | SPAN 46 MATERIAL 75 16 21 47 | SPAN 47 MATERIAL 75 48 61 48 | SPAN 48 MATERIAL 75 76 98 49 | SPAN 49 MATERIAL 75 100 104 50 | SPAN 50 VALUE 75 125 131 51 | SPAN 51 MATERIAL 75 155 169 52 | SPAN 52 EXPERIMENT:current_exp 75 185 189 53 | SPAN 53 EXPERIMENT:current_exp 76 144 146 54 | SPAN 54 VALUE 76 177 183 55 | SPAN 55 MATERIAL 77 31 35 56 | SPAN 56 VALUE 77 84 91 57 | SPAN 57 VALUE 77 95 100 58 | SPAN 58 EXPERIMENT:current_exp 77 104 112 59 | SPAN 59 MATERIAL 95 250 256 60 | SPAN 60 MATERIAL 95 261 267 61 | SPAN 61 MATERIAL 100 30 44 62 | SPAN 62 MATERIAL 101 29 51 63 | SPAN 63 MATERIAL 176 291 329 64 | SPAN 64 MATERIAL 193 125 141 65 | SPAN 65 MATERIAL 194 38 62 66 | SPAN 66 MATERIAL 194 64 68 67 | SPAN 67 EXPERIMENT:general_info 194 77 82 68 | SPAN 68 EXPERIMENT:general_info 194 208 212 69 | SPAN 69 DEVICE 194 233 263 70 | SPAN 70 MATERIAL 198 30 42 71 | SPAN 71 MATERIAL 214 110 124 72 | SPAN 72 MATERIAL 214 252 261 73 | SPAN 73 MATERIAL 215 157 180 74 | SPAN 74 MATERIAL 215 182 186 75 | SPAN 75 MATERIAL 215 192 205 76 | SPAN 76 MATERIAL 215 207 210 77 | SPAN 77 MATERIAL 223 75 79 78 | SPAN 78 MATERIAL 235 51 62 79 | SPAN 79 MATERIAL 235 64 68 80 | SPAN 80 MATERIAL 235 74 83 81 | SPAN 81 MATERIAL 235 85 88 82 | EXPERIMENT 1 3 83 | device 1 84 | electrolyte_material 4 85 | EXPERIMENT 2 5 86 | working_temperature 6 87 | EXPERIMENT 3 7 88 | fuel_used 8 89 | EXPERIMENT 4 13 90 | device 12 91 | working_temperature 14 92 | EXPERIMENT 5 24 93 | cathode_material 22 94 | EXPERIMENT 6 27 95 | cathode_material 25 96 | device 28 97 | EXPERIMENT 7 30 98 | device 31 99 | EXPERIMENT 8 32 100 | cathode_material 33 101 | EXPERIMENT 9 37 102 | cathode_material 38 103 | cathode_material 39 104 | EXPERIMENT 10 41 105 | cathode_material 40 106 | EXPERIMENT 11 52 107 | device 45 108 | support_material 46 109 | interlayer_material 47 110 | electrolyte_material 48 111 | cathode_material 51 112 | EXPERIMENT 12 53 113 | open_circuit_voltage 54 114 | EXPERIMENT 13 58 115 | electrolyte_material 55 116 | power_density 56 117 | working_temperature 57 118 | EXPERIMENT 14 67 119 | cathode_material 65 120 | EXPERIMENT 15 68 121 | device 69 122 | LINK same_experiment 3 5 123 | LINK same_experiment 5 7 124 | LINK same_experiment 20 21 125 | LINK experiment_variation 27 24 126 | LINK same_experiment 21 24 127 | LINK same_experiment 30 32 128 | LINK thickness 48 50 129 | LINK same_experiment 52 53 130 | LINK same_experiment 53 58 131 | LINK same_experiment 67 68 132 | LINK coreference 1 2 133 | LINK coreference 9 10 134 | LINK coreference 22 23 135 | LINK coreference 25 26 136 | LINK coreference 33 34 137 | LINK coreference 44 45 138 | LINK coreference 48 49 139 | LINK coreference 65 66 140 | LINK coreference 73 74 141 | LINK coreference 75 76 142 | LINK coreference 78 79 143 | LINK coreference 80 81 144 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6249295.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 4 32 58 2 | SPAN 2 MATERIAL 4 60 65 3 | SPAN 3 EXPERIMENT:current_exp 4 71 75 4 | SPAN 4 DEVICE 4 112 116 5 | SPAN 5 DEVICE 5 41 45 6 | SPAN 6 EXPERIMENT:current_exp 5 46 51 7 | SPAN 7 VALUE 5 93 104 8 | SPAN 8 VALUE 5 108 114 9 | SPAN 9 VALUE 5 138 143 10 | SPAN 10 VALUE 6 56 68 11 | SPAN 11 VALUE 6 89 101 12 | SPAN 12 EXPERIMENT:current_exp 6 139 147 13 | SPAN 13 VALUE 6 169 174 14 | SPAN 14 VALUE 6 178 184 15 | SPAN 15 DEVICE 35 69 73 16 | SPAN 16 EXPERIMENT:current_exp 35 79 89 17 | SPAN 17 MATERIAL 35 142 150 18 | SPAN 18 MATERIAL 35 173 199 19 | SPAN 19 MATERIAL 35 201 206 20 | SPAN 20 MATERIAL 35 208 212 21 | SPAN 21 DEVICE 71 22 26 22 | SPAN 22 EXPERIMENT:current_exp 71 31 39 23 | SPAN 23 VALUE 71 97 102 24 | SPAN 24 VALUE 71 104 109 25 | SPAN 25 VALUE 71 115 126 26 | SPAN 26 VALUE 71 130 133 27 | SPAN 27 VALUE 71 135 138 28 | SPAN 28 VALUE 71 140 146 29 | SPAN 29 EXPERIMENT:current_exp 72 63 73 30 | SPAN 30 VALUE 72 142 147 31 | SPAN 31 VALUE 72 149 154 32 | SPAN 32 VALUE 72 160 171 33 | SPAN 33 VALUE 72 175 178 34 | SPAN 34 VALUE 72 180 183 35 | SPAN 35 VALUE 72 185 191 36 | SPAN 36 MATERIAL 76 30 40 37 | SPAN 37 EXPERIMENT:current_exp 76 41 45 38 | SPAN 38 VALUE 76 46 51 39 | SPAN 39 VALUE 76 53 58 40 | SPAN 40 VALUE 76 64 75 41 | SPAN 41 VALUE 76 79 82 42 | SPAN 42 VALUE 76 84 87 43 | SPAN 43 VALUE 76 93 99 44 | SPAN 44 EXPERIMENT:current_exp 81 10 15 45 | SPAN 45 DEVICE 81 99 103 46 | SPAN 46 MATERIAL 81 120 122 47 | SPAN 47 MATERIAL 81 135 138 48 | SPAN 48 EXPERIMENT:current_exp 82 68 76 49 | SPAN 49 VALUE 82 118 129 50 | SPAN 50 VALUE 82 133 139 51 | SPAN 51 DEVICE 82 234 238 52 | SPAN 52 MATERIAL 83 61 66 53 | SPAN 53 DEVICE 87 59 63 54 | SPAN 54 EXPERIMENT:current_exp 87 64 69 55 | SPAN 55 VALUE 87 116 128 56 | SPAN 56 VALUE 87 149 161 57 | SPAN 57 VALUE 87 215 220 58 | SPAN 58 VALUE 87 224 230 59 | SPAN 59 EXPERIMENT:current_exp 91 66 74 60 | SPAN 60 DEVICE 91 81 85 61 | SPAN 61 VALUE 91 150 156 62 | SPAN 62 MATERIAL 96 168 194 63 | SPAN 63 MATERIAL 96 196 201 64 | SPAN 64 MATERIAL 97 4 9 65 | SPAN 65 MATERIAL 97 40 66 66 | SPAN 66 MATERIAL 97 68 75 67 | SPAN 67 EXPERIMENT:current_exp 98 26 34 68 | SPAN 68 DEVICE 98 42 46 69 | SPAN 69 DEVICE 98 51 55 70 | SPAN 70 DEVICE 99 3 7 71 | SPAN 71 DEVICE 99 32 36 72 | SPAN 72 EXPERIMENT:current_exp 99 37 42 73 | SPAN 73 VALUE 99 73 84 74 | SPAN 74 VALUE 99 88 94 75 | SPAN 75 VALUE 99 164 169 76 | SPAN 76 DEVICE 100 3 7 77 | SPAN 77 EXPERIMENT:current_exp 100 36 41 78 | SPAN 78 VALUE 100 84 96 79 | SPAN 79 VALUE 100 131 136 80 | SPAN 80 VALUE 100 140 146 81 | EXPERIMENT 1 3 82 | cathode_material 1 83 | device 4 84 | EXPERIMENT 2 6 85 | device 5 86 | power_density 7 87 | working_temperature 8 88 | time_of_operation 9 89 | EXPERIMENT 3 12 90 | current_density 10 91 | current_density 11 92 | voltage 13 93 | working_temperature 14 94 | EXPERIMENT 4 16 95 | device 15 96 | fuel_used 17 97 | cathode_material 18 98 | cathode_material 20 99 | EXPERIMENT 5 22 100 | device 21 101 | resistance 23 102 | resistance 24 103 | resistance 25 104 | working_temperature 26 105 | working_temperature 27 106 | working_temperature 28 107 | EXPERIMENT 6 29 108 | resistance 30 109 | resistance 31 110 | resistance 32 111 | working_temperature 33 112 | working_temperature 34 113 | working_temperature 35 114 | EXPERIMENT 7 37 115 | cathode_material 36 116 | resistance 38 117 | resistance 39 118 | resistance 40 119 | working_temperature 41 120 | working_temperature 42 121 | working_temperature 43 122 | EXPERIMENT 8 44 123 | device 45 124 | fuel_used 46 125 | fuel_used 47 126 | EXPERIMENT 9 48 127 | power_density 49 128 | working_temperature 50 129 | device 51 130 | EXPERIMENT 10 54 131 | device 53 132 | current_density 55 133 | current_density 56 134 | voltage 57 135 | working_temperature 58 136 | EXPERIMENT 11 59 137 | device 60 138 | time_of_operation 61 139 | EXPERIMENT 12 67 140 | device 68 141 | device 69 142 | EXPERIMENT 13 72 143 | device 70 144 | device 71 145 | power_density 73 146 | working_temperature 74 147 | time_of_operation 75 148 | EXPERIMENT 14 77 149 | device 76 150 | current_density 78 151 | voltage 79 152 | working_temperature 80 153 | LINK experiment_variation 6 3 154 | LINK experiment_variation 12 6 155 | LINK same_experiment 44 48 156 | LINK experiment_variation 72 67 157 | LINK experiment_variation 77 72 158 | LINK coreference 1 2 159 | LINK coreference 18 19 160 | LINK coreference 62 63 161 | LINK coreference 65 66 162 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6427619.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 2 65 90 2 | SPAN 2 DEVICE 3 44 53 3 | SPAN 3 EXPERIMENT:current_exp 3 95 107 4 | SPAN 4 MATERIAL 5 46 71 5 | SPAN 5 EXPERIMENT:current_exp 5 72 75 6 | SPAN 6 VALUE 5 76 87 7 | SPAN 7 VALUE 5 92 109 8 | SPAN 8 VALUE 5 113 119 9 | SPAN 9 EXPERIMENT:previous_work 19 12 20 10 | SPAN 10 MATERIAL 19 58 90 11 | SPAN 11 MATERIAL 19 134 151 12 | SPAN 12 VALUE 19 152 164 13 | SPAN 13 EXPERIMENT:previous_work 20 13 18 14 | SPAN 14 MATERIAL 20 41 75 15 | SPAN 15 VALUE 20 98 109 16 | SPAN 16 VALUE 20 113 119 17 | SPAN 17 MATERIAL 46 9 11 18 | SPAN 18 MATERIAL 46 12 14 19 | SPAN 19 DEVICE 46 15 25 20 | SPAN 20 EXPERIMENT:current_exp 46 46 52 21 | SPAN 21 MATERIAL 67 22 47 22 | SPAN 22 EXPERIMENT:current_exp 67 48 52 23 | SPAN 23 VALUE 67 53 96 24 | SPAN 24 VALUE 67 113 123 25 | SPAN 25 EXPERIMENT:current_exp 67 130 132 26 | SPAN 26 MATERIAL 67 147 177 27 | SPAN 27 MATERIAL 67 185 188 28 | SPAN 28 VALUE 67 192 198 29 | SPAN 29 EXPERIMENT:current_exp 75 9 17 30 | SPAN 30 MATERIAL 75 70 86 31 | SPAN 31 MATERIAL 75 101 126 32 | SPAN 32 VALUE 75 160 166 33 | SPAN 33 MATERIAL 79 50 66 34 | SPAN 34 MATERIAL 79 81 106 35 | SPAN 35 EXPERIMENT:current_exp 79 107 110 36 | SPAN 36 VALUE 79 111 121 37 | SPAN 37 VALUE 79 126 136 38 | SPAN 38 MATERIAL 82 175 177 39 | SPAN 39 MATERIAL 82 178 180 40 | SPAN 40 DEVICE 82 181 190 41 | SPAN 41 EXPERIMENT:current_exp 82 191 196 42 | SPAN 42 MATERIAL 82 197 222 43 | SPAN 43 VALUE 82 236 242 44 | SPAN 44 EXPERIMENT:current_exp 82 259 267 45 | SPAN 45 VALUE 82 302 313 46 | SPAN 46 EXPERIMENT:current_exp 82 331 333 47 | SPAN 47 VALUE 82 334 340 48 | SPAN 48 VALUE 82 344 350 49 | SPAN 49 MATERIAL 83 4 25 50 | SPAN 50 EXPERIMENT:current_exp 83 31 34 51 | SPAN 51 VALUE 83 35 47 52 | SPAN 52 EXPERIMENT:current_exp 84 27 29 53 | SPAN 53 DEVICE 84 46 55 54 | SPAN 54 MATERIAL 84 78 92 55 | SPAN 55 MATERIAL 84 101 111 56 | SPAN 56 VALUE 84 113 119 57 | SPAN 57 MATERIAL 84 125 152 58 | SPAN 58 VALUE 84 166 171 59 | SPAN 59 VALUE 84 173 179 60 | SPAN 60 MATERIAL 84 209 229 61 | SPAN 61 MATERIAL 84 237 248 62 | SPAN 62 VALUE 84 262 268 63 | SPAN 63 VALUE 84 270 276 64 | SPAN 64 MATERIAL 89 38 54 65 | SPAN 65 MATERIAL 89 69 94 66 | SPAN 66 EXPERIMENT:current_exp 89 95 99 67 | SPAN 67 VALUE 89 100 110 68 | SPAN 68 VALUE 89 115 125 69 | SPAN 69 VALUE 89 159 165 70 | SPAN 70 MATERIAL 90 46 71 71 | SPAN 71 EXPERIMENT:current_exp 90 72 76 72 | SPAN 72 VALUE 90 77 88 73 | SPAN 73 VALUE 90 93 110 74 | SPAN 74 VALUE 90 114 120 75 | EXPERIMENT 1 3 76 | electrolyte_material 1 77 | device 2 78 | EXPERIMENT 2 5 79 | electrolyte_material 4 80 | power_density 6 81 | conductivity 7 82 | working_temperature 8 83 | EXPERIMENT 3 9 84 | electrolyte_material 10 85 | electrolyte_material 11 86 | working_temperature 12 87 | EXPERIMENT 4 13 88 | electrolyte_material 14 89 | conductivity 15 90 | working_temperature 16 91 | EXPERIMENT 5 20 92 | fuel_used 17 93 | fuel_used 18 94 | device 19 95 | EXPERIMENT 6 22 96 | electrolyte_material 21 97 | conductivity 23 98 | working_temperature 24 99 | EXPERIMENT 7 25 100 | electrolyte_material 26 101 | fuel_used 27 102 | working_temperature 28 103 | EXPERIMENT 8 29 104 | electrolyte_material 30 105 | electrolyte_material 31 106 | working_temperature 32 107 | EXPERIMENT 9 35 108 | electrolyte_material 33 109 | electrolyte_material 34 110 | resistance 36 111 | resistance 37 112 | EXPERIMENT 10 41 113 | fuel_used 38 114 | fuel_used 39 115 | device 40 116 | electrolyte_material 42 117 | EXPERIMENT 11 44 118 | power_density 45 119 | EXPERIMENT 12 46 120 | voltage 47 121 | working_temperature 48 122 | EXPERIMENT 13 50 123 | electrolyte_material 49 124 | power_density 51 125 | EXPERIMENT 14 52 126 | device 53 127 | electrolyte_material 54 128 | electrolyte_material 55 129 | working_temperature 56 130 | electrolyte_material 57 131 | working_temperature 59 132 | electrolyte_material 60 133 | working_temperature 63 134 | EXPERIMENT 15 66 135 | electrolyte_material 64 136 | electrolyte_material 65 137 | resistance 67 138 | resistance 68 139 | working_temperature 69 140 | EXPERIMENT 16 71 141 | electrolyte_material 70 142 | power_density 72 143 | conductivity 73 144 | working_temperature 74 145 | LINK experiment_variation 25 22 146 | LINK thickness 42 43 147 | LINK same_experiment 41 44 148 | LINK experiment_variation 50 46 149 | LINK same_experiment 44 46 150 | LINK experiment_variation 52 50 151 | LINK thickness 57 58 152 | LINK thickness 61 62 153 | LINK same_experiment 66 71 154 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5457196.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 213 2 | 2 0 214 391 3 | 3 0 392 430 4 | 4 0 431 781 5 | 5 0 782 868 6 | 6 0 869 1107 7 | 7 1 1108 1391 8 | 8 1 1392 1517 9 | 9 1 1518 1818 10 | 10 0 1819 2013 11 | 11 0 2014 2220 12 | 12 0 2221 2400 13 | 13 1 2401 2531 14 | 14 0 2532 2759 15 | 15 1 2760 3018 16 | 16 0 3019 3501 17 | 17 0 3502 3612 18 | 18 0 3613 3787 19 | 19 1 3788 3926 20 | 20 0 3927 4050 21 | 21 0 4051 4160 22 | 22 1 4161 4390 23 | 23 0 4391 4490 24 | 24 0 4491 4654 25 | 25 1 4655 4917 26 | 26 0 4918 5018 27 | 27 0 5019 5233 28 | 28 0 5234 5444 29 | 29 0 5445 5921 30 | 30 0 5922 6199 31 | 31 0 6200 6417 32 | 32 0 6418 6498 33 | 33 0 6499 6628 34 | 34 0 6629 6834 35 | 35 0 6835 6847 36 | 36 0 6848 6922 37 | 37 0 6923 7010 38 | 38 0 7011 7143 39 | 39 0 7144 7225 40 | 40 0 7226 7386 41 | 41 0 7387 7531 42 | 42 0 7532 7742 43 | 43 0 7743 7806 44 | 44 0 7807 7983 45 | 45 0 7984 8021 46 | 46 0 8022 8101 47 | 47 0 8102 8154 48 | 48 0 8155 8243 49 | 49 0 8244 8331 50 | 50 1 8332 8470 51 | 51 1 8471 8585 52 | 52 0 8586 8739 53 | 53 0 8740 8844 54 | 54 0 8845 8919 55 | 55 0 8920 8983 56 | 56 0 8984 9095 57 | 57 0 9096 9271 58 | 58 0 9272 9396 59 | 59 0 9397 9483 60 | 60 0 9484 9574 61 | 61 0 9575 9700 62 | 62 0 9701 9851 63 | 63 1 9852 10062 64 | 64 0 10063 10192 65 | 65 0 10193 10290 66 | 66 0 10291 10363 67 | 67 0 10364 10443 68 | 68 0 10444 10624 69 | 69 0 10625 10731 70 | 70 0 10732 10883 71 | 71 0 10884 10982 72 | 72 0 10983 11196 73 | 73 0 11197 11257 74 | 74 0 11258 11393 75 | 75 0 11394 11453 76 | 76 0 11454 11516 77 | 77 0 11517 11663 78 | 78 0 11664 11777 79 | 79 0 11778 11898 80 | 80 0 11899 11956 81 | 81 0 11957 12038 82 | 82 0 12039 12258 83 | 83 0 12259 12390 84 | 84 0 12391 12551 85 | 85 0 12552 12679 86 | 86 0 12680 12901 87 | 87 0 12902 13026 88 | 88 0 13027 13191 89 | 89 0 13192 13322 90 | 90 0 13323 13462 91 | 91 0 13463 13657 92 | 92 0 13658 13821 93 | 93 0 13822 13926 94 | 94 0 13927 14064 95 | 95 0 14065 14130 96 | 96 0 14131 14239 97 | 97 0 14240 14489 98 | 98 0 14490 14602 99 | 99 0 14603 14680 100 | 100 0 14681 14739 101 | 101 0 14740 14857 102 | 102 0 14858 14974 103 | 103 0 14975 15125 104 | 104 0 15126 15244 105 | 105 0 15245 15350 106 | 106 0 15351 15544 107 | 107 0 15545 15629 108 | 108 0 15630 15702 109 | 109 0 15703 15814 110 | 110 0 15815 15876 111 | 111 0 15877 15934 112 | 112 0 15935 16072 113 | 113 0 16073 16275 114 | 114 0 16276 16429 115 | 115 0 16430 16538 116 | 116 0 16539 16972 117 | 117 0 16973 17137 118 | 118 0 17138 17200 119 | 119 0 17201 17379 120 | 120 0 17380 17499 121 | 121 0 17500 17594 122 | 122 0 17595 17665 123 | 123 0 17666 17775 124 | 124 1 17776 17914 125 | 125 1 17915 18027 126 | 126 0 18028 18137 127 | 127 1 18138 18304 128 | 128 0 18305 18371 129 | 129 0 18372 18418 130 | 130 0 18419 18443 131 | 131 0 18444 18523 132 | 132 1 18524 18643 133 | 133 1 18644 18739 134 | 134 0 18740 18848 135 | 135 0 18849 19016 136 | 136 0 19017 19126 137 | 137 0 19127 19327 138 | 138 0 19328 19554 139 | 139 0 19555 19852 140 | 140 0 19853 20012 141 | 141 0 20013 20383 142 | 142 0 20384 20522 143 | 143 0 20523 20589 144 | 144 0 20590 20858 145 | 145 0 20859 20941 146 | 146 0 20942 21128 147 | 147 0 21129 21247 148 | 148 0 21248 21454 149 | 149 0 21455 21588 150 | 150 0 21589 21739 151 | 151 0 21740 21874 152 | 152 0 21875 22617 153 | 153 0 22618 22693 154 | 154 0 22694 22964 155 | 155 0 22965 23167 156 | 156 0 23168 23494 157 | 157 0 23495 23582 158 | 158 0 23583 23777 159 | 159 0 23778 23936 160 | 160 0 23937 24093 161 | 161 0 24094 24192 162 | 162 0 24193 24235 163 | 163 0 24236 24362 164 | 164 0 24363 24490 165 | 165 0 24491 24825 166 | 166 0 24826 24895 167 | 167 0 24896 24957 168 | 168 0 24958 25212 169 | 169 0 25213 25292 170 | 170 0 25293 25381 171 | 171 0 25382 25450 172 | 172 0 25451 25551 173 | 173 0 25552 25623 174 | 174 0 25624 25961 175 | 175 0 25962 26058 176 | 176 0 26059 26097 177 | 177 0 26098 26245 178 | 178 0 26246 26337 179 | 179 0 26338 26428 180 | 180 0 26429 26666 181 | 181 0 26667 26748 182 | 182 0 26749 27085 183 | 183 0 27086 27144 184 | 184 0 27145 27281 185 | 185 0 27282 27356 186 | 186 0 27357 27438 187 | 187 0 27439 27528 188 | 188 0 27529 27670 189 | 189 0 27671 27841 190 | 190 0 27842 28078 191 | 191 0 28079 28247 192 | 192 0 28248 28375 193 | 193 0 28376 28565 194 | 194 0 28566 28663 195 | 195 0 28664 28833 196 | 196 0 28834 28997 197 | 197 0 28998 29215 198 | 198 0 29216 29335 199 | 199 0 29336 29509 200 | 200 0 29510 29653 201 | 201 0 29654 29770 202 | 202 0 29771 29952 203 | 203 0 29953 30235 204 | 204 0 30236 30458 205 | 205 0 30459 30719 206 | 206 0 30720 30925 207 | 207 0 30926 31086 208 | -------------------------------------------------------------------------------- /code/source/utils.py: -------------------------------------------------------------------------------- 1 | """ Neural models for information extraction tasks related to the SOFC-Exp corpus (ACL 2020). 2 | Copyright (c) 2020 Robert Bosch GmbH 3 | @author: Heike Adel 4 | @author: Annemarie Friedrich 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | 20 | import numpy as np 21 | 22 | 23 | def compute_eval_scores(conf_matrix, labels): 24 | """ 25 | :param conf_matrix: num_classes x num_classes confusion matrix, first dimension is the gold standard label, 26 | second dimension is the predicted label 27 | :param labels: list of labels, indices correspond to the indices in the confusion matrix 28 | :return: accuracy, macro p, r, f, dictionaries with p, r, f by class 29 | """ 30 | print("labels", labels) 31 | num_classes = len(labels) 32 | conf = np.array(conf_matrix) 33 | 34 | p = {} # precision by class 35 | r = {} # recall by class 36 | f1 = {} # F1 by class 37 | 38 | micro_tp = 0 39 | micro_total_gold = 0 40 | micro_total_pred = 0 41 | 42 | for i in range(num_classes): 43 | cat = labels[i] 44 | correct = conf[i, i] 45 | total_gold = sum(conf[i]) 46 | total_pred = sum(conf[:,i]) 47 | if i > 0: # do not include negative class 48 | micro_tp += correct 49 | micro_total_gold += total_gold 50 | micro_total_pred += total_pred 51 | # Define for now: if not predicted this class at all, P=1 52 | if total_pred == 0: 53 | p[cat] = 1 54 | else: 55 | p[cat] = correct/total_pred * 100 56 | if total_gold == 0: 57 | r[cat] = 0 58 | else: 59 | r[cat] = correct/total_gold * 100 60 | # Define for now: if P=0 and R=0 then F1 = 0 61 | if p[cat] == 0 and r[cat] == 0: 62 | f1[cat] = 0 63 | else: 64 | f1[cat] = 2*p[cat]*r[cat]/(p[cat]+r[cat]) 65 | 66 | # micro-averages 67 | if micro_total_pred == 0: 68 | micro_p = 0 69 | else: 70 | micro_p = micro_tp / micro_total_pred * 100 71 | if micro_total_gold == 0: 72 | micro_r = 0 73 | else: 74 | micro_r = micro_tp / micro_total_gold * 100 75 | if micro_p + micro_r == 0: 76 | micro_f1 = 0 77 | else: 78 | micro_f1 = 2 * micro_p * micro_r / (micro_p + micro_r) 79 | 80 | # macro-averages (each class weighted equally) 81 | macro_p = sum(p.values()) / num_classes 82 | macro_r = sum(r.values()) / num_classes 83 | # macro-avg. F1 is the harmonic mean of macro-p and macro-r 84 | macro_f1 = 2*macro_p*macro_r/(macro_p+macro_r) 85 | 86 | # accuracy 87 | correct = 0 88 | total = 0 89 | for i in range(num_classes): 90 | correct += conf[i,i] 91 | total += sum(conf[i]) 92 | accuracy = correct/total*100 93 | 94 | return accuracy, macro_p, macro_r, macro_f1, micro_p, micro_r, micro_f1, p, r, f1 95 | 96 | 97 | def print_results_classification(conf_matrix, dataset_name, num_labels): 98 | """ 99 | :param conf_matrix: confusion matrix with predictions vs. true labels 100 | :param dataset_name: name of dataset (for printing only) 101 | :param num_labels: number of labels 102 | :return: 103 | """ 104 | results = compute_eval_scores(conf_matrix, [i for i in range(num_labels)]) 105 | acc, macro_p, macro_r, macro_f, micro_p, micro_r, micro_f1, classwise_p, classwise_r, classwise_f = results 106 | print(dataset_name + ' F1: {:.1f}'.format(micro_f1)) 107 | print("class-wise results:") 108 | for cl in range(num_labels): 109 | print("{0: <11}".format(cl) + " {:7.1f}".format(classwise_p[cl]) + " {:7.1f}".format(classwise_r[cl]) + " {:7.1f}".format(classwise_f[cl])) 110 | print("macro-avg:", "{:7.1f}".format(macro_p), "{:7.1f}".format(macro_r), "{:7.1f}".format(macro_f)) 111 | for row in conf_matrix: 112 | print("\t", row) 113 | return classwise_p, classwise_r, classwise_f 114 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC5944822.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 252 2 | 2 0 253 501 3 | 3 0 502 703 4 | 4 0 704 1043 5 | 5 0 1044 1354 6 | 6 0 1355 1519 7 | 7 0 1520 1687 8 | 8 0 1688 1820 9 | 9 0 1821 1910 10 | 10 0 1911 2591 11 | 11 0 2592 2810 12 | 12 0 2811 2943 13 | 13 0 2944 3321 14 | 14 0 3322 4200 15 | 15 0 4201 4367 16 | 16 0 4368 4507 17 | 17 0 4508 4640 18 | 18 0 4641 4846 19 | 19 0 4847 5042 20 | 20 0 5043 5207 21 | 21 0 5208 5325 22 | 22 0 5326 5587 23 | 23 0 5588 6121 24 | 24 0 6122 6335 25 | 25 0 6336 6998 26 | 26 0 6999 7191 27 | 27 0 7192 7364 28 | 28 0 7365 7561 29 | 29 0 7562 7934 30 | 30 0 7935 8640 31 | 31 0 8641 8814 32 | 32 0 8815 9048 33 | 33 0 9049 9279 34 | 34 0 9280 9751 35 | 35 0 9752 10600 36 | 36 0 10601 11252 37 | 37 0 11253 11620 38 | 38 0 11621 11796 39 | 39 0 11797 11862 40 | 40 0 11863 12048 41 | 41 0 12049 12089 42 | 42 0 12090 12134 43 | 43 0 12135 12455 44 | 44 0 12456 12562 45 | 45 0 12563 14283 46 | 46 0 14284 14501 47 | 47 0 14502 14866 48 | 48 0 14867 15242 49 | 49 0 15243 15429 50 | 50 0 15430 15608 51 | 51 0 15609 15669 52 | 52 0 15670 16401 53 | 53 0 16402 16464 54 | 54 0 16465 16507 55 | 55 0 16508 16642 56 | 56 0 16643 17016 57 | 57 0 17017 17159 58 | 58 0 17160 17329 59 | 59 0 17330 17433 60 | 60 0 17434 17585 61 | 61 0 17586 17711 62 | 62 0 17712 17814 63 | 63 0 17815 17854 64 | 64 0 17855 17898 65 | 65 0 17899 18135 66 | 66 0 18136 18535 67 | 67 0 18536 18626 68 | 68 0 18627 18802 69 | 69 0 18803 19041 70 | 70 0 19042 19426 71 | 71 0 19427 19949 72 | 72 0 19950 20417 73 | 73 0 20418 21396 74 | 74 0 21397 21611 75 | 75 0 21612 21936 76 | 76 0 21937 22224 77 | 77 0 22225 22798 78 | 78 0 22799 22906 79 | 79 0 22907 23187 80 | 80 0 23188 23329 81 | 81 0 23330 23737 82 | 82 0 23738 24794 83 | 83 0 24795 25577 84 | 84 0 25578 26130 85 | 85 0 26131 26379 86 | 86 0 26380 26910 87 | 87 0 26911 26978 88 | 88 0 26979 27033 89 | 89 0 27034 27073 90 | 90 0 27074 27118 91 | 91 0 27119 27446 92 | 92 0 27447 27588 93 | 93 0 27589 27759 94 | 94 0 27760 28176 95 | 95 0 28177 28340 96 | 96 0 28341 28493 97 | 97 0 28494 28664 98 | 98 0 28665 28851 99 | 99 0 28852 29094 100 | 100 0 29095 29283 101 | 101 0 29284 29435 102 | 102 0 29436 29605 103 | 103 0 29606 29848 104 | 104 0 29849 29924 105 | 105 0 29925 30337 106 | 106 0 30338 30530 107 | 107 0 30531 30829 108 | 108 0 30830 30922 109 | 109 0 30923 31043 110 | 110 0 31044 31259 111 | 111 0 31260 31452 112 | 112 0 31453 31632 113 | 113 0 31633 31819 114 | 114 0 31820 32041 115 | 115 0 32042 32197 116 | 116 0 32198 32428 117 | 117 0 32429 32607 118 | 118 0 32608 33140 119 | 119 0 33141 33515 120 | 120 0 33516 34032 121 | 121 0 34033 34210 122 | 122 0 34211 34387 123 | 123 0 34388 34526 124 | 124 0 34527 34920 125 | 125 0 34921 35257 126 | 126 0 35258 35598 127 | 127 0 35599 36870 128 | 128 0 36871 37086 129 | 129 0 37087 37532 130 | 130 0 37533 38394 131 | 131 0 38395 39134 132 | 132 0 39135 39247 133 | 133 0 39248 39611 134 | 134 0 39612 39685 135 | 135 0 39686 40016 136 | 136 1 40017 40395 137 | 137 0 40396 40565 138 | 138 1 40566 40945 139 | 139 0 40946 41052 140 | 140 0 41053 41397 141 | 141 0 41398 41552 142 | 142 1 41553 41886 143 | 143 0 41887 41993 144 | 144 1 41994 42617 145 | 145 0 42618 43028 146 | 146 0 43029 43277 147 | 147 0 43278 43461 148 | 148 0 43462 43554 149 | 149 1 43555 44131 150 | 150 1 44132 44754 151 | 151 0 44755 44796 152 | 152 0 44797 44841 153 | 153 1 44842 45564 154 | 154 0 45565 45750 155 | 155 0 45751 46207 156 | 156 0 46208 46498 157 | 157 0 46499 46607 158 | 158 0 46608 46793 159 | 159 0 46794 47250 160 | 160 0 47251 49943 161 | 161 0 49944 50153 162 | 162 0 50154 50284 163 | 163 0 50285 50927 164 | 164 0 50928 51274 165 | 165 0 51275 51733 166 | 166 0 51734 52300 167 | 167 0 52301 52389 168 | 168 0 52390 52663 169 | 169 0 52664 52844 170 | 170 0 52845 53015 171 | 171 0 53016 53070 172 | 172 0 53071 53904 173 | 173 0 53905 54136 174 | 174 0 54137 54678 175 | 175 0 54679 55223 176 | 176 0 55224 55540 177 | 177 0 55541 56761 178 | 178 0 56762 56893 179 | 179 0 56894 57028 180 | 180 0 57029 57212 181 | 181 0 57213 57363 182 | 182 0 57364 58086 183 | 183 0 58087 58229 184 | 184 0 58230 58431 185 | 185 0 58432 58796 186 | 186 0 58797 59130 187 | 187 0 59131 59306 188 | 188 0 59307 59742 189 | 189 0 59743 59937 190 | 190 0 59938 60250 191 | 191 0 60251 60410 192 | 192 0 60411 60738 193 | 193 0 60739 60952 194 | 194 0 60953 61016 195 | 195 0 61017 61195 196 | 196 0 61196 61381 197 | 197 0 61382 61594 198 | 198 0 61595 62089 199 | 199 0 62090 62509 200 | 200 0 62510 62651 201 | 201 0 62652 62757 202 | 202 0 62758 62993 203 | 203 0 62994 63490 204 | 204 0 63491 63692 205 | 205 0 63693 64103 206 | 206 0 64104 64109 207 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC6523084.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 653 2 | 2 0 654 816 3 | 3 0 817 1004 4 | 4 0 1005 1213 5 | 5 0 1214 1331 6 | 6 1 1332 1576 7 | 7 0 1577 1726 8 | 8 0 1727 2033 9 | 9 0 2034 2248 10 | 10 0 2249 2353 11 | 11 0 2354 2470 12 | 12 0 2471 2567 13 | 13 0 2568 2723 14 | 14 0 2724 2803 15 | 15 0 2804 2921 16 | 16 0 2922 2997 17 | 17 0 2998 3208 18 | 18 0 3209 3380 19 | 19 0 3381 3660 20 | 20 0 3661 3843 21 | 21 0 3844 4041 22 | 22 0 4042 4114 23 | 23 0 4115 4462 24 | 24 0 4463 4684 25 | 25 0 4685 4823 26 | 26 0 4824 5073 27 | 27 0 5074 5262 28 | 28 0 5263 5491 29 | 29 0 5492 5741 30 | 30 0 5742 6032 31 | 31 0 6033 6145 32 | 32 0 6146 6308 33 | 33 0 6309 6551 34 | 34 0 6552 6711 35 | 35 0 6712 6898 36 | 36 0 6899 7097 37 | 37 0 7098 7213 38 | 38 0 7214 7318 39 | 39 0 7319 7496 40 | 40 0 7497 7656 41 | 41 0 7657 8027 42 | 42 0 8028 8091 43 | 43 0 8092 8186 44 | 44 0 8187 8493 45 | 45 1 8494 8700 46 | 46 1 8701 8958 47 | 47 1 8959 9079 48 | 48 0 9080 9269 49 | 49 0 9270 9401 50 | 50 0 9402 9518 51 | 51 0 9519 9715 52 | 52 0 9716 9860 53 | 53 0 9861 9960 54 | 54 0 9961 10054 55 | 55 0 10055 10254 56 | 56 0 10255 10465 57 | 57 0 10466 10695 58 | 58 0 10696 11026 59 | 59 0 11027 11178 60 | 60 0 11179 11325 61 | 61 0 11326 11517 62 | 62 0 11518 11751 63 | 63 0 11752 11886 64 | 64 0 11887 12014 65 | 65 0 12015 12180 66 | 66 0 12181 12401 67 | 67 0 12402 12508 68 | 68 0 12509 12648 69 | 69 0 12649 12736 70 | 70 0 12737 13032 71 | 71 0 13033 13113 72 | 72 0 13114 13262 73 | 73 0 13263 13807 74 | 74 0 13808 14037 75 | 75 0 14038 14323 76 | 76 0 14324 14575 77 | 77 0 14576 14722 78 | 78 0 14723 14858 79 | 79 0 14859 14986 80 | 80 0 14987 15117 81 | 81 0 15118 15243 82 | 82 0 15244 15414 83 | 83 0 15415 15471 84 | 84 0 15472 15606 85 | 85 0 15607 15762 86 | 86 0 15763 15944 87 | 87 0 15945 16124 88 | 88 0 16125 16303 89 | 89 0 16304 16407 90 | 90 0 16408 16536 91 | 91 0 16537 16604 92 | 92 0 16605 16752 93 | 93 0 16753 16872 94 | 94 0 16873 17008 95 | 95 0 17009 17089 96 | 96 0 17090 17191 97 | 97 0 17192 17287 98 | 98 0 17288 17425 99 | 99 0 17426 17630 100 | 100 0 17631 17724 101 | 101 0 17725 17882 102 | 102 0 17883 18021 103 | 103 0 18022 18121 104 | 104 0 18122 18343 105 | 105 0 18344 18467 106 | 106 0 18468 18589 107 | 107 0 18590 18685 108 | 108 0 18686 18803 109 | 109 0 18804 18935 110 | 110 0 18936 19100 111 | 111 0 19101 19281 112 | 112 0 19282 19574 113 | 113 0 19575 19722 114 | 114 0 19723 19835 115 | 115 0 19836 20099 116 | 116 0 20100 20303 117 | 117 0 20304 20386 118 | 118 0 20387 20539 119 | 119 0 20540 20607 120 | 120 1 20608 20711 121 | 121 0 20712 20815 122 | 122 0 20816 20922 123 | 123 0 20923 21139 124 | 124 0 21140 21255 125 | 125 0 21256 21318 126 | 126 0 21319 21438 127 | 127 0 21439 21512 128 | 128 0 21513 21673 129 | 129 0 21674 21790 130 | 130 0 21791 22029 131 | 131 0 22030 22156 132 | 132 0 22157 22338 133 | 133 0 22339 22399 134 | 134 0 22400 22537 135 | 135 0 22538 22683 136 | 136 0 22684 22712 137 | 137 0 22713 22901 138 | 138 0 22902 22976 139 | 139 0 22977 23173 140 | 140 0 23174 23250 141 | 141 0 23251 23372 142 | 142 0 23373 23522 143 | 143 0 23523 23674 144 | 144 0 23675 23769 145 | 145 0 23770 23873 146 | 146 0 23874 23942 147 | 147 0 23943 24195 148 | 148 0 24196 24305 149 | 149 0 24306 24358 150 | 150 0 24359 24495 151 | 151 0 24496 24672 152 | 152 0 24673 24868 153 | 153 0 24869 25063 154 | 154 0 25064 25355 155 | 155 0 25356 25587 156 | 156 0 25588 25700 157 | 157 0 25701 25869 158 | 158 0 25870 26064 159 | 159 0 26065 26236 160 | 160 0 26237 26281 161 | 161 0 26282 26381 162 | 162 0 26382 26689 163 | 163 1 26690 26759 164 | 164 0 26760 26993 165 | 165 0 26994 27044 166 | 166 0 27045 27246 167 | 167 0 27247 27395 168 | 168 0 27396 27541 169 | 169 0 27542 27683 170 | 170 0 27684 27899 171 | 171 0 27900 28110 172 | 172 0 28111 28404 173 | 173 0 28405 28504 174 | 174 0 28505 28684 175 | 175 0 28685 28882 176 | 176 0 28883 28966 177 | 177 0 28967 29050 178 | 178 0 29051 29176 179 | 179 0 29177 29296 180 | 180 0 29297 29347 181 | 181 0 29348 29451 182 | 182 0 29452 29621 183 | 183 1 29622 29727 184 | 184 1 29728 29812 185 | 185 1 29813 30056 186 | 186 1 30057 30245 187 | 187 0 30246 30402 188 | 188 0 30403 30530 189 | 189 0 30531 30602 190 | 190 0 30603 30829 191 | 191 1 30830 31003 192 | 192 0 31004 31147 193 | 193 0 31148 31348 194 | 194 0 31349 31585 195 | 195 0 31586 31739 196 | 196 0 31740 31871 197 | 197 0 31872 32107 198 | 198 0 32108 32259 199 | 199 0 32260 32423 200 | 200 0 32424 32686 201 | 201 0 32687 32862 202 | 202 0 32863 32964 203 | 203 0 32965 33131 204 | 204 0 33132 33339 205 | 205 0 33340 33464 206 | 206 0 33465 33594 207 | 207 0 33595 33801 208 | 208 0 33802 34026 209 | 209 0 34027 34182 210 | 210 0 34183 34381 211 | 211 0 34382 34545 212 | 212 0 34546 34828 213 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC4222441.csv: -------------------------------------------------------------------------------- 1 | 1 1 0 739 2 | 2 1 740 847 3 | 3 0 848 1008 4 | 4 0 1009 1382 5 | 5 0 1383 1777 6 | 6 0 1778 1856 7 | 7 0 1857 2079 8 | 8 0 2080 2169 9 | 9 1 2170 2248 10 | 10 0 2249 2297 11 | 11 0 2298 2490 12 | 12 0 2491 2664 13 | 13 0 2665 2884 14 | 14 0 2885 3045 15 | 15 0 3046 3330 16 | 16 0 3331 3592 17 | 17 0 3593 3625 18 | 18 0 3626 3802 19 | 19 0 3803 3963 20 | 20 1 3964 4198 21 | 21 1 4199 4448 22 | 22 0 4449 4656 23 | 23 0 4657 4725 24 | 24 0 4726 4903 25 | 25 0 4904 5045 26 | 26 0 5046 5135 27 | 27 0 5136 5305 28 | 28 0 5306 5450 29 | 29 0 5451 5583 30 | 30 0 5584 5830 31 | 31 0 5831 5949 32 | 32 0 5950 6098 33 | 33 0 6099 6280 34 | 34 0 6281 6717 35 | 35 0 6718 6839 36 | 36 0 6840 6998 37 | 37 1 6999 7182 38 | 38 0 7183 7437 39 | 39 1 7438 7585 40 | 40 0 7586 7667 41 | 41 0 7668 7771 42 | 42 0 7772 7829 43 | 43 0 7830 7977 44 | 44 0 7978 8055 45 | 45 0 8056 8508 46 | 46 0 8509 8607 47 | 47 0 8608 8703 48 | 48 0 8704 8738 49 | 49 0 8739 8868 50 | 50 0 8869 9020 51 | 51 0 9021 9135 52 | 52 1 9136 9361 53 | 53 0 9362 9539 54 | 54 0 9540 9574 55 | 55 0 9575 9799 56 | 56 0 9800 10027 57 | 57 0 10028 10160 58 | 58 0 10161 10275 59 | 59 0 10276 10315 60 | 60 0 10316 10589 61 | 61 0 10590 10669 62 | 62 0 10670 10887 63 | 63 0 10888 11009 64 | 64 0 11010 11104 65 | 65 0 11105 11206 66 | 66 0 11207 11326 67 | 67 0 11327 11438 68 | 68 0 11439 11527 69 | 69 0 11528 11661 70 | 70 0 11662 11781 71 | 71 0 11782 11841 72 | 72 0 11842 12023 73 | 73 0 12024 12678 74 | 74 0 12679 12770 75 | 75 0 12771 12843 76 | 76 0 12844 12903 77 | 77 0 12904 13059 78 | 78 0 13060 13119 79 | 79 0 13120 13262 80 | 80 0 13263 13402 81 | 81 0 13403 13526 82 | 82 0 13527 13613 83 | 83 0 13614 13675 84 | 84 0 13676 13785 85 | 85 0 13786 13976 86 | 86 0 13977 14097 87 | 87 0 14098 14259 88 | 88 0 14260 14537 89 | 89 0 14538 14629 90 | 90 0 14630 14834 91 | 91 0 14835 14938 92 | 92 0 14939 15093 93 | 93 0 15094 15169 94 | 94 0 15170 15234 95 | 95 0 15235 15352 96 | 96 0 15353 15464 97 | 97 0 15465 15615 98 | 98 0 15616 15800 99 | 99 0 15801 15903 100 | 100 0 15904 16179 101 | 101 0 16180 16332 102 | 102 0 16333 16551 103 | 103 0 16552 16640 104 | 104 0 16641 16775 105 | 105 0 16776 16889 106 | 106 0 16890 16988 107 | 107 0 16989 17130 108 | 108 0 17131 17190 109 | 109 0 17191 17502 110 | 110 0 17503 17579 111 | 111 0 17580 17627 112 | 112 0 17628 17834 113 | 113 0 17835 17944 114 | 114 0 17945 18059 115 | 115 0 18060 18257 116 | 116 0 18258 18409 117 | 117 0 18410 18486 118 | 118 0 18487 18672 119 | 119 0 18673 18759 120 | 120 0 18760 19213 121 | 121 0 19214 19503 122 | 122 0 19504 19659 123 | 123 0 19660 19888 124 | 124 0 19889 20161 125 | 125 0 20162 20249 126 | 126 0 20250 20578 127 | 127 0 20579 20818 128 | 128 0 20819 20987 129 | 129 0 20988 21142 130 | 130 0 21143 21282 131 | 131 0 21283 21385 132 | 132 0 21386 21457 133 | 133 0 21458 21501 134 | 134 0 21502 21628 135 | 135 0 21629 21772 136 | 136 0 21773 21873 137 | 137 0 21874 22079 138 | 138 0 22080 22333 139 | 139 0 22334 22516 140 | 140 0 22517 22761 141 | 141 0 22762 22938 142 | 142 0 22939 23125 143 | 143 0 23126 23360 144 | 144 0 23361 23538 145 | 145 0 23539 23836 146 | 146 0 23837 24094 147 | 147 0 24095 24246 148 | 148 0 24247 24549 149 | 149 0 24550 24781 150 | 150 0 24782 25011 151 | 151 0 25012 25192 152 | 152 0 25193 25324 153 | 153 0 25325 25521 154 | 154 0 25522 25645 155 | 155 0 25646 25742 156 | 156 0 25743 25940 157 | 157 0 25941 26142 158 | 158 0 26143 26322 159 | 159 0 26323 26466 160 | 160 0 26467 26548 161 | 161 0 26549 26618 162 | 162 0 26619 26816 163 | 163 0 26817 26926 164 | 164 0 26927 27070 165 | 165 1 27071 27209 166 | 166 0 27210 27450 167 | 167 0 27451 27511 168 | 168 0 27512 27668 169 | 169 0 27669 27837 170 | 170 0 27838 28081 171 | 171 0 28082 28219 172 | 172 0 28220 28468 173 | 173 0 28469 28716 174 | 174 0 28717 28858 175 | 175 0 28859 28973 176 | 176 0 28974 29024 177 | 177 0 29025 29129 178 | 178 0 29130 29252 179 | 179 0 29253 29467 180 | 180 0 29468 29707 181 | 181 0 29708 29846 182 | 182 0 29847 29886 183 | 183 0 29887 29998 184 | 184 0 29999 30204 185 | 185 0 30205 30389 186 | 186 0 30390 30540 187 | 187 0 30541 30669 188 | 188 0 30670 30819 189 | 189 0 30820 30993 190 | 190 0 30994 31151 191 | 191 0 31152 31302 192 | 192 0 31303 31624 193 | 193 0 31625 31824 194 | 194 0 31825 31924 195 | 195 0 31925 32189 196 | 196 0 32190 32403 197 | 197 0 32404 32479 198 | 198 0 32480 32519 199 | 199 0 32520 32598 200 | 200 0 32599 32765 201 | 201 0 32766 32957 202 | 202 0 32958 33142 203 | 203 0 33143 33249 204 | 204 0 33250 33381 205 | 205 0 33382 33551 206 | 206 0 33552 33682 207 | 207 0 33683 33821 208 | 208 0 33822 33914 209 | 209 0 33915 33996 210 | 210 0 33997 34137 211 | 211 0 34138 34352 212 | 212 0 34353 34490 213 | 213 0 34491 34609 214 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6445146.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 8 31 38 2 | SPAN 2 VALUE 8 74 85 3 | SPAN 3 VALUE 8 89 95 4 | SPAN 4 VALUE 9 257 267 5 | SPAN 5 EXPERIMENT:current_exp 11 17 23 6 | SPAN 6 DEVICE 11 43 55 7 | SPAN 7 DEVICE 11 77 98 8 | SPAN 8 DEVICE 12 25 29 9 | SPAN 9 EXPERIMENT:general_info 12 46 53 10 | SPAN 10 MATERIAL 12 91 111 11 | SPAN 11 DEVICE 13 15 42 12 | SPAN 12 DEVICE 13 44 49 13 | SPAN 13 DEVICE 13 66 89 14 | SPAN 14 EXPERIMENT:general_info 13 126 139 15 | SPAN 15 VALUE 13 225 235 16 | SPAN 16 DEVICE 13 279 288 17 | SPAN 17 DEVICE 14 125 129 18 | SPAN 18 DEVICE 16 67 72 19 | SPAN 19 EXPERIMENT:general_info 16 115 117 20 | SPAN 20 DEVICE 16 148 161 21 | SPAN 21 VALUE 16 167 177 22 | SPAN 22 VALUE 16 181 189 23 | SPAN 23 EXPERIMENT:current_exp 28 11 17 24 | SPAN 24 DEVICE 28 58 62 25 | SPAN 25 DEVICE 28 171 175 26 | SPAN 26 EXPERIMENT:current_exp 35 49 53 27 | SPAN 27 MATERIAL 35 77 89 28 | SPAN 28 MATERIAL 35 91 94 29 | SPAN 29 MATERIAL 35 96 122 30 | SPAN 30 MATERIAL 35 124 127 31 | SPAN 31 EXPERIMENT:current_exp 100 68 71 32 | SPAN 32 VALUE 100 72 83 33 | SPAN 33 VALUE 100 87 97 34 | SPAN 34 EXPERIMENT:current_exp 100 166 176 35 | SPAN 35 VALUE 100 181 185 36 | SPAN 36 MATERIAL 100 192 195 37 | SPAN 37 DEVICE 101 61 65 38 | SPAN 38 EXPERIMENT:current_exp 101 66 70 39 | SPAN 39 VALUE 101 71 75 40 | SPAN 40 VALUE 101 77 81 41 | SPAN 41 VALUE 101 83 87 42 | SPAN 42 VALUE 101 92 103 43 | SPAN 43 VALUE 101 107 110 44 | SPAN 44 VALUE 101 112 115 45 | SPAN 45 VALUE 101 117 120 46 | SPAN 46 VALUE 101 125 131 47 | SPAN 47 DEVICE 101 197 202 48 | SPAN 48 VALUE 101 223 229 49 | SPAN 49 EXPERIMENT:current_exp 101 230 234 50 | SPAN 50 VALUE 101 235 239 51 | SPAN 51 VALUE 101 241 245 52 | SPAN 52 VALUE 101 250 261 53 | SPAN 53 VALUE 106 38 49 54 | SPAN 54 EXPERIMENT:current_exp 106 50 58 55 | SPAN 55 DEVICE 106 137 142 56 | SPAN 56 MATERIAL 106 148 151 57 | SPAN 57 EXPERIMENT:current_exp 107 48 60 58 | SPAN 58 DEVICE 107 104 108 59 | SPAN 59 VALUE 107 112 118 60 | SPAN 60 VALUE 107 123 128 61 | SPAN 61 VALUE 108 87 92 62 | SPAN 62 DEVICE 112 130 135 63 | SPAN 63 EXPERIMENT:current_exp 112 170 173 64 | SPAN 64 VALUE 112 174 177 65 | SPAN 65 VALUE 112 179 183 66 | SPAN 66 VALUE 112 188 198 67 | SPAN 67 DEVICE 120 103 107 68 | SPAN 68 MATERIAL 120 113 116 69 | SPAN 69 EXPERIMENT:current_exp 120 129 135 70 | SPAN 70 VALUE 120 277 286 71 | SPAN 71 VALUE 140 24 30 72 | SPAN 72 MATERIAL 140 36 38 73 | SPAN 73 EXPERIMENT:current_exp 140 51 55 74 | SPAN 74 VALUE 140 56 60 75 | SPAN 75 VALUE 140 62 66 76 | SPAN 76 VALUE 140 71 82 77 | SPAN 77 DEVICE 140 221 225 78 | SPAN 78 VALUE 140 227 238 79 | SPAN 79 EXPERIMENT:current_exp 141 29 38 80 | SPAN 80 VALUE 141 48 53 81 | SPAN 81 DEVICE 141 54 63 82 | SPAN 82 DEVICE 143 21 26 83 | SPAN 83 EXPERIMENT:current_exp 143 32 41 84 | SPAN 84 VALUE 143 113 122 85 | EXPERIMENT 1 1 86 | power_density 2 87 | working_temperature 3 88 | EXPERIMENT 2 5 89 | device 6 90 | device 7 91 | EXPERIMENT 3 9 92 | device 8 93 | electrolyte_material 10 94 | EXPERIMENT 4 14 95 | device 11 96 | working_temperature 15 97 | device 16 98 | EXPERIMENT 5 19 99 | device 18 100 | device 20 101 | power_density 21 102 | power_density 22 103 | EXPERIMENT 6 23 104 | device 24 105 | device 25 106 | EXPERIMENT 7 26 107 | anode_material 27 108 | anode_material 29 109 | EXPERIMENT 8 31 110 | open_circuit_voltage 32 111 | working_temperature 33 112 | EXPERIMENT 9 34 113 | electrolyte_material 36 114 | EXPERIMENT 10 38 115 | device 37 116 | power_density 39 117 | power_density 40 118 | power_density 41 119 | power_density 42 120 | working_temperature 43 121 | working_temperature 44 122 | working_temperature 45 123 | working_temperature 46 124 | EXPERIMENT 11 49 125 | device 47 126 | working_temperature 48 127 | power_density 50 128 | power_density 51 129 | power_density 52 130 | EXPERIMENT 12 54 131 | power_density 53 132 | device 55 133 | electrolyte_material 56 134 | EXPERIMENT 13 57 135 | device 58 136 | working_temperature 59 137 | voltage 60 138 | time_of_operation 61 139 | EXPERIMENT 14 63 140 | device 62 141 | resistance 64 142 | resistance 65 143 | resistance 66 144 | EXPERIMENT 15 69 145 | device 67 146 | electrolyte_material 68 147 | power_density 70 148 | EXPERIMENT 16 73 149 | working_temperature 71 150 | fuel_used 72 151 | power_density 74 152 | power_density 75 153 | power_density 76 154 | EXPERIMENT 17 79 155 | time_of_operation 80 156 | device 81 157 | EXPERIMENT 18 83 158 | device 82 159 | power_density 84 160 | LINK same_experiment 31 34 161 | LINK thickness 36 35 162 | LINK same_experiment 34 38 163 | LINK same_experiment 38 49 164 | LINK coreference 11 12 165 | LINK coreference 11 13 166 | LINK coreference 27 28 167 | LINK coreference 29 30 168 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/sentences/PMC3793895.csv: -------------------------------------------------------------------------------- 1 | 1 0 0 234 2 | 2 0 235 264 3 | 3 0 265 353 4 | 4 0 354 370 5 | 5 0 371 888 6 | 6 0 889 1058 7 | 7 0 1059 1267 8 | 8 0 1268 1365 9 | 9 0 1366 1566 10 | 10 0 1567 2413 11 | 11 0 2414 2587 12 | 12 0 2588 3023 13 | 13 1 3024 3467 14 | 14 0 3468 3664 15 | 15 0 3665 3757 16 | 16 0 3758 3927 17 | 17 0 3928 4057 18 | 18 0 4058 4278 19 | 19 0 4279 4391 20 | 20 0 4392 4684 21 | 21 0 4685 4809 22 | 22 0 4810 5073 23 | 23 0 5074 5203 24 | 24 0 5204 5482 25 | 25 0 5483 5614 26 | 26 0 5615 5834 27 | 27 0 5835 6056 28 | 28 0 6057 6287 29 | 29 0 6288 6460 30 | 30 0 6461 6575 31 | 31 0 6576 6671 32 | 32 0 6672 6800 33 | 33 0 6801 7111 34 | 34 0 7112 7237 35 | 35 0 7238 7327 36 | 36 0 7328 7477 37 | 37 0 7478 7626 38 | 38 0 7627 7684 39 | 39 0 7685 7833 40 | 40 0 7834 7905 41 | 41 0 7906 7968 42 | 42 0 7969 8036 43 | 43 0 8037 8151 44 | 44 0 8152 8299 45 | 45 0 8300 8394 46 | 46 0 8395 8635 47 | 47 0 8636 8919 48 | 48 0 8920 9066 49 | 49 0 9067 9360 50 | 50 0 9361 9494 51 | 51 0 9495 9987 52 | 52 0 9988 10232 53 | 53 0 10233 10459 54 | 54 0 10460 10545 55 | 55 0 10546 10759 56 | 56 0 10760 11033 57 | 57 0 11034 11138 58 | 58 0 11139 11391 59 | 59 0 11392 11799 60 | 60 0 11800 12071 61 | 61 0 12072 12373 62 | 62 0 12374 12495 63 | 63 0 12496 12599 64 | 64 0 12600 12763 65 | 65 0 12764 12884 66 | 66 0 12885 12989 67 | 67 0 12990 13104 68 | 68 0 13105 13226 69 | 69 0 13227 13309 70 | 70 0 13310 13444 71 | 71 0 13445 13570 72 | 72 0 13571 13617 73 | 73 0 13618 13756 74 | 74 0 13757 14087 75 | 75 0 14088 14302 76 | 76 0 14303 14503 77 | 77 0 14504 14903 78 | 78 0 14904 15203 79 | 79 0 15204 15272 80 | 80 0 15273 15501 81 | 81 0 15502 15566 82 | 82 0 15567 15663 83 | 83 0 15664 16992 84 | 84 0 16993 17350 85 | 85 0 17351 17427 86 | 86 0 17428 17521 87 | 87 0 17522 17749 88 | 88 0 17750 17873 89 | 89 0 17874 18100 90 | 90 0 18101 18287 91 | 91 0 18288 18432 92 | 92 0 18433 18467 93 | 93 0 18468 18545 94 | 94 0 18546 18659 95 | 95 0 18660 18888 96 | 96 0 18889 18985 97 | 97 0 18986 19190 98 | 98 0 19191 19460 99 | 99 0 19461 19716 100 | 100 0 19717 19923 101 | 101 0 19924 20045 102 | 102 0 20046 20276 103 | 103 0 20277 20426 104 | 104 0 20427 20536 105 | 105 0 20537 20625 106 | 106 0 20626 20860 107 | 107 0 20861 21201 108 | 108 0 21202 21512 109 | 109 0 21513 21631 110 | 110 0 21632 21942 111 | 111 0 21943 22140 112 | 112 0 22141 22360 113 | 113 0 22361 22737 114 | 114 0 22738 22911 115 | 115 0 22912 23155 116 | 116 0 23156 23290 117 | 117 0 23291 23451 118 | 118 0 23452 23578 119 | 119 0 23579 24113 120 | 120 0 24114 24338 121 | 121 0 24339 24592 122 | 122 0 24593 24821 123 | 123 1 24822 25225 124 | 124 0 25226 25332 125 | 125 0 25333 25765 126 | 126 0 25766 25986 127 | 127 0 25987 26088 128 | 128 0 26089 26215 129 | 129 0 26216 26491 130 | 130 0 26492 26611 131 | 131 0 26612 26873 132 | 132 0 26874 26902 133 | 133 0 26903 27127 134 | 134 0 27128 27278 135 | 135 0 27279 27447 136 | 136 0 27448 27575 137 | 137 0 27576 27933 138 | 138 0 27934 28076 139 | 139 0 28077 28594 140 | 140 0 28595 28757 141 | 141 0 28758 28932 142 | 142 0 28933 29287 143 | 143 0 29288 29473 144 | 144 0 29474 29670 145 | 145 0 29671 29903 146 | 146 0 29904 30101 147 | 147 0 30102 30195 148 | 148 0 30196 30391 149 | 149 0 30392 30454 150 | 150 0 30455 30488 151 | 151 0 30489 30709 152 | 152 0 30710 30906 153 | 153 0 30907 31048 154 | 154 1 31049 31306 155 | 155 1 31307 31540 156 | 156 1 31541 31858 157 | 157 0 31859 32036 158 | 158 0 32037 32164 159 | 159 0 32165 32306 160 | 160 0 32307 32499 161 | 161 0 32500 32657 162 | 162 0 32658 32719 163 | 163 0 32720 32812 164 | 164 0 32813 32867 165 | 165 0 32868 33216 166 | 166 1 33217 33305 167 | 167 1 33306 33516 168 | 168 0 33517 33614 169 | 169 0 33615 33837 170 | 170 0 33838 34051 171 | 171 0 34052 34407 172 | 172 0 34408 34676 173 | 173 0 34677 34889 174 | 174 0 34890 35050 175 | 175 0 35051 35190 176 | 176 0 35191 35295 177 | 177 0 35296 35438 178 | 178 0 35439 35624 179 | 179 0 35625 36049 180 | 180 0 36050 36121 181 | 181 0 36122 36539 182 | 182 0 36540 36863 183 | 183 0 36864 37009 184 | 184 0 37010 37142 185 | 185 0 37143 37282 186 | 186 0 37283 37821 187 | 187 0 37822 38072 188 | 188 0 38073 38282 189 | 189 0 38283 38567 190 | 190 0 38568 38766 191 | 191 0 38767 39143 192 | 192 0 39144 39260 193 | 193 0 39261 39388 194 | 194 0 39389 39455 195 | 195 0 39456 39819 196 | 196 0 39820 39997 197 | 197 0 39998 40126 198 | 198 0 40127 40273 199 | 199 0 40274 40436 200 | 200 0 40437 40602 201 | 201 0 40603 40990 202 | 202 0 40991 41214 203 | 203 0 41215 41501 204 | 204 0 41502 41897 205 | 205 0 41898 42147 206 | 206 0 42148 42390 207 | 207 0 42391 42645 208 | 208 0 42646 42888 209 | 209 0 42889 43090 210 | 210 0 43091 43213 211 | 211 0 43214 43438 212 | 212 0 43439 43603 213 | 213 0 43604 43886 214 | 214 0 43887 44071 215 | 215 0 44072 44370 216 | 216 0 44371 44545 217 | 217 0 44546 44857 218 | 218 0 44858 45165 219 | 219 0 45166 45248 220 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5456866.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:previous_work 2 22 29 2 | SPAN 2 MATERIAL 2 78 92 3 | SPAN 3 EXPERIMENT:current_exp 3 24 30 4 | SPAN 4 EXPERIMENT:current_exp 4 8 20 5 | SPAN 5 MATERIAL 4 109 142 6 | SPAN 6 EXPERIMENT:current_exp 4 161 171 7 | SPAN 7 DEVICE 4 187 191 8 | SPAN 8 DEVICE 6 4 15 9 | SPAN 9 MATERIAL 6 26 44 10 | SPAN 10 MATERIAL 6 70 96 11 | SPAN 11 MATERIAL 6 98 102 12 | SPAN 12 MATERIAL 6 123 141 13 | SPAN 13 EXPERIMENT:current_exp 6 151 155 14 | SPAN 14 VALUE 6 183 186 15 | SPAN 15 VALUE 6 191 202 16 | SPAN 16 VALUE 6 206 212 17 | SPAN 17 MATERIAL 6 238 240 18 | SPAN 18 MATERIAL 6 253 256 19 | SPAN 19 MATERIAL 12 0 26 20 | SPAN 20 EXPERIMENT:general_info 12 49 53 21 | SPAN 21 DEVICE 12 74 81 22 | SPAN 22 MATERIAL 13 0 24 23 | SPAN 23 MATERIAL 13 26 30 24 | SPAN 24 MATERIAL 13 40 56 25 | SPAN 25 MATERIAL 13 58 61 26 | SPAN 26 EXPERIMENT:general_info 13 96 108 27 | SPAN 27 VALUE 13 191 203 28 | SPAN 28 EXPERIMENT:previous_work 21 47 54 29 | SPAN 29 MATERIAL 21 103 117 30 | SPAN 30 EXPERIMENT:current_exp 22 23 31 31 | SPAN 31 EXPERIMENT:current_exp 23 3 15 32 | SPAN 32 DEVICE 23 200 204 33 | SPAN 33 MATERIAL 23 211 213 34 | SPAN 34 MATERIAL 108 108 149 35 | SPAN 35 VALUE 111 35 41 36 | SPAN 36 MATERIAL 111 46 86 37 | SPAN 37 VALUE 111 95 99 38 | SPAN 38 VALUE 111 101 105 39 | SPAN 39 VALUE 111 111 122 40 | SPAN 40 MATERIAL 113 17 33 41 | SPAN 41 EXPERIMENT:current_exp 113 34 37 42 | SPAN 42 MATERIAL 113 96 104 43 | SPAN 43 MATERIAL 113 114 138 44 | SPAN 44 MATERIAL 113 140 144 45 | SPAN 45 EXPERIMENT:previous_work 113 146 153 46 | SPAN 46 VALUE 113 158 167 47 | SPAN 47 VALUE 113 171 177 48 | SPAN 48 MATERIAL 113 189 205 49 | SPAN 49 MATERIAL 113 207 210 50 | SPAN 50 MATERIAL 114 58 89 51 | SPAN 51 MATERIAL 114 111 115 52 | SPAN 52 MATERIAL 118 35 69 53 | SPAN 53 EXPERIMENT:current_exp 118 95 101 54 | SPAN 54 DEVICE 118 112 117 55 | SPAN 55 MATERIAL 118 124 145 56 | SPAN 56 VALUE 118 168 180 57 | SPAN 57 MATERIAL 118 181 185 58 | SPAN 58 EXPERIMENT:current_exp 119 12 22 59 | SPAN 59 VALUE 119 98 101 60 | SPAN 60 VALUE 119 106 112 61 | SPAN 61 DEVICE 119 126 131 62 | SPAN 62 MATERIAL 119 146 148 63 | SPAN 63 EXPERIMENT:current_exp 119 172 178 64 | SPAN 64 MATERIAL 119 182 185 65 | SPAN 65 EXPERIMENT:current_exp 120 28 37 66 | SPAN 66 DEVICE 120 45 50 67 | SPAN 67 MATERIAL 120 59 76 68 | SPAN 68 MATERIAL 120 81 99 69 | SPAN 69 VALUE 120 113 116 70 | SPAN 70 VALUE 120 121 132 71 | SPAN 71 VALUE 120 136 142 72 | SPAN 72 EXPERIMENT:previous_work 121 44 52 73 | SPAN 73 MATERIAL 121 57 65 74 | SPAN 74 VALUE 121 80 91 75 | SPAN 75 VALUE 121 95 101 76 | SPAN 76 MATERIAL 125 49 67 77 | SPAN 77 MATERIAL 125 107 111 78 | SPAN 78 VALUE 125 152 162 79 | SPAN 79 VALUE 125 166 172 80 | SPAN 80 EXPERIMENT:previous_work 125 177 185 81 | SPAN 81 MATERIAL 126 43 76 82 | SPAN 82 EXPERIMENT:current_exp 126 93 102 83 | SPAN 83 MATERIAL 133 66 70 84 | EXPERIMENT 1 1 85 | cathode_material 2 86 | EXPERIMENT 2 4 87 | cathode_material 5 88 | EXPERIMENT 3 6 89 | device 7 90 | EXPERIMENT 4 13 91 | device 8 92 | cathode_material 9 93 | electrolyte_material 10 94 | anode_material 12 95 | power_density 14 96 | power_density 15 97 | working_temperature 16 98 | fuel_used 17 99 | fuel_used 18 100 | EXPERIMENT 5 20 101 | cathode_material 19 102 | device 21 103 | EXPERIMENT 6 26 104 | cathode_material 22 105 | cathode_material 24 106 | working_temperature 27 107 | EXPERIMENT 7 28 108 | cathode_material 29 109 | EXPERIMENT 8 31 110 | device 32 111 | fuel_used 33 112 | EXPERIMENT 9 41 113 | cathode_material 40 114 | cathode_material 42 115 | EXPERIMENT 10 45 116 | cathode_material 43 117 | conductivity 46 118 | working_temperature 47 119 | cathode_material 48 120 | EXPERIMENT 11 53 121 | cathode_material 52 122 | device 54 123 | support_material 55 124 | electrolyte_material 57 125 | EXPERIMENT 12 58 126 | working_temperature 59 127 | working_temperature 60 128 | device 61 129 | fuel_used 62 130 | EXPERIMENT 13 63 131 | fuel_used 64 132 | EXPERIMENT 14 65 133 | device 66 134 | cathode_material 67 135 | cathode_material 68 136 | power_density 69 137 | power_density 70 138 | working_temperature 71 139 | EXPERIMENT 15 72 140 | power_density 74 141 | working_temperature 75 142 | EXPERIMENT 16 80 143 | cathode_material 76 144 | electrolyte_material 77 145 | power_density 78 146 | working_temperature 79 147 | EXPERIMENT 17 82 148 | cathode_material 81 149 | LINK experiment_variation 3 1 150 | LINK same_experiment 3 4 151 | LINK same_experiment 4 6 152 | LINK experiment_variation 26 20 153 | LINK experiment_variation 30 28 154 | LINK same_experiment 30 31 155 | LINK same_experiment 41 45 156 | LINK thickness 57 56 157 | LINK same_experiment 53 58 158 | LINK same_experiment 58 63 159 | LINK same_experiment 63 65 160 | LINK coreference 5 9 161 | LINK coreference 10 11 162 | LINK coreference 22 23 163 | LINK coreference 24 25 164 | LINK coreference 43 44 165 | LINK coreference 48 49 166 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC5457052.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 EXPERIMENT:current_exp 1 1092 1098 2 | SPAN 2 MATERIAL 1 1144 1159 3 | SPAN 3 DEVICE 1 1160 1181 4 | SPAN 4 EXPERIMENT:current_exp 2 32 41 5 | SPAN 5 EXPERIMENT:current_exp 2 80 86 6 | SPAN 6 VALUE 2 106 113 7 | SPAN 7 VALUE 2 117 123 8 | SPAN 8 MATERIAL 26 165 190 9 | SPAN 9 MATERIAL 26 192 196 10 | SPAN 10 MATERIAL 27 88 92 11 | SPAN 11 EXPERIMENT:current_exp 27 122 130 12 | SPAN 12 DEVICE 27 136 147 13 | SPAN 13 EXPERIMENT:current_exp 27 188 190 14 | SPAN 14 DEVICE 27 218 223 15 | SPAN 15 VALUE 27 225 234 16 | SPAN 16 VALUE 27 238 243 17 | SPAN 17 VALUE 27 248 254 18 | SPAN 18 MATERIAL 28 87 93 19 | SPAN 19 MATERIAL 28 94 100 20 | SPAN 20 MATERIAL 28 111 118 21 | SPAN 21 VALUE 28 141 147 22 | SPAN 22 VALUE 28 182 199 23 | SPAN 23 VALUE 28 203 208 24 | SPAN 24 EXPERIMENT:previous_work 28 214 222 25 | SPAN 25 EXPERIMENT:current_exp 31 22 25 26 | SPAN 26 MATERIAL 31 40 48 27 | SPAN 27 VALUE 31 97 103 28 | SPAN 28 VALUE 31 105 114 29 | SPAN 29 EXPERIMENT:current_exp 33 23 27 30 | SPAN 30 VALUE 33 76 86 31 | SPAN 31 VALUE 33 90 95 32 | SPAN 32 VALUE 33 100 106 33 | SPAN 33 DEVICE 40 27 33 34 | SPAN 34 EXPERIMENT:current_exp 40 34 43 35 | SPAN 35 VALUE 40 59 64 36 | SPAN 36 DEVICE 40 167 173 37 | SPAN 37 EXPERIMENT:current_exp 40 174 179 38 | SPAN 38 VALUE 40 209 217 39 | SPAN 39 VALUE 40 221 225 40 | SPAN 40 VALUE 40 231 236 41 | SPAN 41 EXPERIMENT:current_exp 40 244 251 42 | SPAN 42 VALUE 40 255 260 43 | SPAN 43 VALUE 40 266 272 44 | SPAN 44 DEVICE 41 97 102 45 | SPAN 45 EXPERIMENT:current_exp 41 103 107 46 | SPAN 46 VALUE 41 119 126 47 | SPAN 47 VALUE 41 140 147 48 | SPAN 48 VALUE 41 160 166 49 | SPAN 49 VALUE 49 65 74 50 | SPAN 50 EXPERIMENT:current_exp 49 79 87 51 | SPAN 51 VALUE 57 44 48 52 | SPAN 52 DEVICE 57 51 57 53 | SPAN 53 EXPERIMENT:current_exp 57 58 62 54 | SPAN 54 VALUE 57 63 68 55 | SPAN 55 VALUE 57 72 77 56 | SPAN 56 DEVICE 57 106 112 57 | SPAN 57 EXPERIMENT:current_exp 57 113 121 58 | SPAN 58 VALUE 57 125 130 59 | SPAN 59 VALUE 57 132 137 60 | SPAN 60 MATERIAL 119 14 29 61 | SPAN 61 DEVICE 119 30 34 62 | SPAN 62 MATERIAL 119 53 58 63 | SPAN 63 MATERIAL 119 91 98 64 | SPAN 64 MATERIAL 119 122 126 65 | SPAN 65 EXPERIMENT:current_exp 119 140 148 66 | SPAN 66 DEVICE 121 40 45 67 | SPAN 67 MATERIAL 121 54 58 68 | SPAN 68 EXPERIMENT:current_exp 121 85 93 69 | SPAN 69 VALUE 121 119 126 70 | SPAN 70 VALUE 121 130 136 71 | SPAN 71 VALUE 121 141 150 72 | SPAN 72 MATERIAL 122 24 27 73 | SPAN 73 DEVICE 122 56 60 74 | SPAN 74 EXPERIMENT:current_exp 122 70 78 75 | SPAN 75 VALUE 122 82 91 76 | SPAN 76 VALUE 122 106 111 77 | SPAN 77 VALUE 122 115 121 78 | SPAN 78 MATERIAL 124 20 23 79 | SPAN 79 EXPERIMENT:current_exp 124 68 76 80 | SPAN 80 VALUE 124 80 89 81 | SPAN 81 VALUE 124 94 98 82 | EXPERIMENT 1 1 83 | support_material 2 84 | device 3 85 | EXPERIMENT 2 5 86 | power_density 6 87 | working_temperature 7 88 | EXPERIMENT 3 11 89 | cathode_material 10 90 | device 12 91 | EXPERIMENT 4 13 92 | device 14 93 | current_density 15 94 | voltage 16 95 | working_temperature 17 96 | EXPERIMENT 5 24 97 | fuel_used 18 98 | fuel_used 19 99 | fuel_used 20 100 | working_temperature 21 101 | power_density 22 102 | voltage 23 103 | EXPERIMENT 6 25 104 | fuel_used 26 105 | working_temperature 27 106 | current_density 28 107 | EXPERIMENT 7 29 108 | current_density 30 109 | voltage 31 110 | working_temperature 32 111 | EXPERIMENT 8 34 112 | device 33 113 | time_of_operation 35 114 | EXPERIMENT 9 37 115 | device 36 116 | degradation_rate 38 117 | degradation_rate 39 118 | time_of_operation 40 119 | EXPERIMENT 10 41 120 | degradation_rate 42 121 | time_of_operation 43 122 | EXPERIMENT 11 45 123 | device 44 124 | open_circuit_voltage 46 125 | open_circuit_voltage 47 126 | working_temperature 48 127 | EXPERIMENT 12 50 128 | current_density 49 129 | EXPERIMENT 13 53 130 | device 52 131 | degradation_rate 54 132 | degradation_rate 55 133 | EXPERIMENT 14 57 134 | device 56 135 | degradation_rate 58 136 | degradation_rate 59 137 | EXPERIMENT 15 65 138 | support_material 60 139 | device 61 140 | support_material 62 141 | anode_material 63 142 | electrolyte_material 64 143 | EXPERIMENT 16 68 144 | device 66 145 | cathode_material 67 146 | power_density 69 147 | working_temperature 70 148 | current_density 71 149 | EXPERIMENT 17 74 150 | support_material 72 151 | device 73 152 | current_density 75 153 | time_of_operation 76 154 | working_temperature 77 155 | EXPERIMENT 18 79 156 | support_material 78 157 | current_density 80 158 | time_of_operation 81 159 | LINK same_experiment 1 4 160 | LINK same_experiment 4 5 161 | LINK same_experiment 11 13 162 | LINK same_experiment 25 29 163 | LINK experiment_variation 37 34 164 | LINK same_experiment 37 41 165 | LINK thickness 53 51 166 | LINK experiment_variation 57 53 167 | LINK same_experiment 65 68 168 | LINK same_experiment 65 68 169 | LINK experiment_variation 79 74 170 | LINK same_experiment 68 74 171 | LINK coreference 8 9 172 | -------------------------------------------------------------------------------- /sofc-exp-corpus/annotations/frames/PMC6073263.csv: -------------------------------------------------------------------------------- 1 | SPAN 1 MATERIAL 2 105 120 2 | SPAN 2 MATERIAL 2 121 125 3 | SPAN 3 MATERIAL 5 123 133 4 | SPAN 4 EXPERIMENT:current_exp 5 168 180 5 | SPAN 5 VALUE 5 184 194 6 | SPAN 6 EXPERIMENT:current_exp 6 39 48 7 | SPAN 7 MATERIAL 6 58 72 8 | SPAN 8 VALUE 6 74 80 9 | SPAN 9 EXPERIMENT:current_exp 7 40 51 10 | SPAN 10 MATERIAL 7 61 75 11 | SPAN 11 VALUE 7 77 83 12 | SPAN 12 MATERIAL 8 13 17 13 | SPAN 13 EXPERIMENT:current_exp 8 136 140 14 | SPAN 14 DEVICE 8 163 185 15 | SPAN 15 DEVICE 8 187 192 16 | SPAN 16 MATERIAL 9 62 82 17 | SPAN 17 MATERIAL 9 84 87 18 | SPAN 18 EXPERIMENT:general_info 9 98 102 19 | SPAN 19 DEVICE 11 9 14 20 | SPAN 20 MATERIAL 11 21 24 21 | SPAN 21 EXPERIMENT:general_info 11 57 60 22 | SPAN 22 VALUE 11 83 94 23 | SPAN 23 EXPERIMENT:previous_work 14 28 33 24 | SPAN 24 MATERIAL 14 70 73 25 | SPAN 25 VALUE 14 143 158 26 | SPAN 26 VALUE 14 162 168 27 | SPAN 27 EXPERIMENT:previous_work 15 13 21 28 | SPAN 28 MATERIAL 15 31 38 29 | SPAN 29 MATERIAL 15 40 60 30 | SPAN 30 EXPERIMENT:previous_work 15 112 115 31 | SPAN 31 MATERIAL 15 170 173 32 | SPAN 32 VALUE 15 177 187 33 | SPAN 33 MATERIAL 16 22 30 34 | SPAN 34 MATERIAL 16 32 38 35 | SPAN 35 MATERIAL 16 40 44 36 | SPAN 36 MATERIAL 16 56 61 37 | SPAN 37 EXPERIMENT:general_info 16 83 87 38 | SPAN 38 DEVICE 16 112 121 39 | SPAN 39 EXPERIMENT:current_exp 32 143 149 40 | SPAN 40 VALUE 32 213 223 41 | SPAN 41 EXPERIMENT:current_exp 53 35 43 42 | SPAN 42 MATERIAL 53 52 56 43 | SPAN 43 VALUE 53 68 74 44 | SPAN 44 MATERIAL 53 77 81 45 | SPAN 45 VALUE 53 93 100 46 | SPAN 46 MATERIAL 53 107 111 47 | SPAN 47 VALUE 53 123 130 48 | SPAN 48 VALUE 53 138 155 49 | SPAN 49 VALUE 53 157 174 50 | SPAN 50 VALUE 53 180 197 51 | SPAN 51 VALUE 53 201 207 52 | SPAN 52 MATERIAL 55 37 41 53 | SPAN 53 VALUE 55 53 59 54 | SPAN 54 MATERIAL 55 77 81 55 | SPAN 55 VALUE 55 93 100 56 | SPAN 56 MATERIAL 55 138 142 57 | SPAN 57 VALUE 55 154 161 58 | SPAN 58 MATERIAL 70 43 57 59 | SPAN 59 EXPERIMENT:current_exp 70 133 139 60 | SPAN 60 VALUE 70 143 149 61 | SPAN 61 EXPERIMENT:current_exp 73 40 42 62 | SPAN 62 VALUE 73 43 50 63 | SPAN 63 MATERIAL 76 4 6 64 | SPAN 64 MATERIAL 76 7 9 65 | SPAN 65 DEVICE 76 10 19 66 | SPAN 66 EXPERIMENT:current_exp 76 52 58 67 | SPAN 67 VALUE 76 62 68 68 | SPAN 68 MATERIAL 76 77 91 69 | SPAN 69 VALUE 76 93 99 70 | SPAN 70 EXPERIMENT:current_exp 77 10 14 71 | SPAN 71 MATERIAL 77 24 38 72 | SPAN 72 VALUE 77 40 47 73 | SPAN 73 EXPERIMENT:current_exp 77 48 55 74 | SPAN 74 VALUE 77 85 91 75 | SPAN 75 MATERIAL 78 33 47 76 | SPAN 76 VALUE 78 49 55 77 | SPAN 77 EXPERIMENT:current_exp 78 57 59 78 | SPAN 78 VALUE 78 60 72 79 | SPAN 79 VALUE 78 86 92 80 | SPAN 80 VALUE 78 97 103 81 | SPAN 81 EXPERIMENT:general_info 79 39 47 82 | SPAN 82 MATERIAL 79 48 55 83 | SPAN 83 DEVICE 79 76 85 84 | SPAN 84 VALUE 79 91 106 85 | SPAN 85 VALUE 79 110 120 86 | SPAN 86 MATERIAL 86 33 47 87 | SPAN 87 VALUE 86 49 55 88 | SPAN 88 EXPERIMENT:current_exp 86 57 59 89 | SPAN 89 VALUE 86 60 72 90 | SPAN 90 VALUE 86 86 92 91 | SPAN 91 VALUE 86 97 103 92 | EXPERIMENT 1 4 93 | electrolyte_material 3 94 | working_temperature 5 95 | EXPERIMENT 2 6 96 | electrolyte_material 7 97 | working_temperature 8 98 | EXPERIMENT 3 9 99 | electrolyte_material 10 100 | working_temperature 11 101 | EXPERIMENT 4 13 102 | electrolyte_material 12 103 | device 14 104 | EXPERIMENT 5 18 105 | electrolyte_material 16 106 | EXPERIMENT 6 21 107 | device 19 108 | electrolyte_material 20 109 | working_temperature 22 110 | EXPERIMENT 7 23 111 | electrolyte_material 24 112 | power_density 25 113 | working_temperature 26 114 | EXPERIMENT 8 27 115 | electrolyte_material 28 116 | working_temperature 32 117 | EXPERIMENT 9 37 118 | device 38 119 | EXPERIMENT 10 39 120 | working_temperature 40 121 | EXPERIMENT 11 41 122 | electrolyte_material 42 123 | electrolyte_material 44 124 | electrolyte_material 46 125 | conductivity 48 126 | conductivity 49 127 | conductivity 50 128 | working_temperature 51 129 | EXPERIMENT 12 59 130 | electrolyte_material 58 131 | working_temperature 60 132 | EXPERIMENT 13 61 133 | open_circuit_voltage 62 134 | EXPERIMENT 14 66 135 | fuel_used 63 136 | fuel_used 64 137 | device 65 138 | working_temperature 67 139 | electrolyte_material 68 140 | EXPERIMENT 15 70 141 | electrolyte_material 71 142 | EXPERIMENT 16 73 143 | open_circuit_voltage 74 144 | EXPERIMENT 17 77 145 | electrolyte_material 75 146 | power_density 78 147 | working_temperature 80 148 | EXPERIMENT 18 81 149 | support_material 82 150 | device 83 151 | power_density 84 152 | working_temperature 85 153 | EXPERIMENT 19 88 154 | electrolyte_material 86 155 | power_density 89 156 | working_temperature 91 157 | LINK same_experiment 4 6 158 | LINK same_experiment 6 9 159 | LINK same_experiment 27 30 160 | LINK same_experiment 59 61 161 | LINK same_experiment 66 70 162 | LINK same_experiment 70 73 163 | LINK same_experiment 73 77 164 | LINK thickness 75 79 165 | LINK thickness 86 90 166 | LINK coreference 1 2 167 | LINK coreference 14 15 168 | LINK coreference 16 17 169 | --------------------------------------------------------------------------------